You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2015/09/10 09:50:49 UTC

[05/13] any23 git commit: adding HEvent and HProduct Extractors

adding HEvent and HProduct Extractors


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/cc0dfbe8
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/cc0dfbe8
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/cc0dfbe8

Branch: refs/heads/master
Commit: cc0dfbe8127a00fa712c7d2df6785a73c290feae
Parents: 1616c17
Author: Nisala <ni...@gmail.com>
Authored: Mon Jul 20 01:12:27 2015 +0530
Committer: Nisala <ni...@gmail.com>
Committed: Mon Jul 20 01:12:27 2015 +0530

----------------------------------------------------------------------
 .../java/org/apache/any23/vocab/HEvent.java     |  57 ++++++
 .../java/org/apache/any23/vocab/HProduct.java   |  58 ++++++
 .../html/microformats2/HEventExtractor.java     | 195 +++++++++++++++++++
 .../microformats2/HEventExtractorFactory.java   |  57 ++++++
 .../html/microformats2/HProductExtractor.java   | 153 +++++++++++++++
 .../microformats2/HProductExtractorFactory.java |  56 ++++++
 .../apache/any23/prefixes/prefixes.properties   |   2 +
 .../html/microformats2/HEventExtractorTest.java |  37 ++++
 .../microformats2/HProductExtractorTest.java    |  37 ++++
 .../microformats2/h-event/h-event-test.html     |  36 ++++
 .../microformats2/h-product/h-product-test.html |  36 ++++
 11 files changed, 724 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/api/src/main/java/org/apache/any23/vocab/HEvent.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/HEvent.java b/api/src/main/java/org/apache/any23/vocab/HEvent.java
new file mode 100644
index 0000000..b936c3e
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/vocab/HEvent.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * Vocabulary to map the <a href="http://microformats.org/wiki/h-event">h-event</a> microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HEvent extends Vocabulary {
+    public static final String NS = SINDICE.NS + "hevent/";
+
+    private static HEvent instance;
+
+    public static HEvent getInstance() {
+        if(instance == null) {
+            instance = new HEvent();
+        }
+        return instance;
+    }
+
+    public URI event  = createClass(NS, "Event");
+
+
+    public URI name  = createProperty(NS, "name");
+    public URI summary   = createProperty(NS, "summary");
+    public URI start = createProperty(NS, "start");
+    public URI end   = createProperty(NS, "end");
+    public URI duration = createProperty(NS, "duration");
+    public URI description  = createProperty(NS, "description");
+    public URI url = createProperty(NS, "url");
+    public URI category  = createProperty(NS, "category");
+    public URI location = createProperty(NS, "location");
+    public URI attendee  = createProperty(NS, "attendee");
+
+
+    private HEvent() {
+        super(NS);
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/api/src/main/java/org/apache/any23/vocab/HProduct.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/HProduct.java b/api/src/main/java/org/apache/any23/vocab/HProduct.java
new file mode 100644
index 0000000..9630db3
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/vocab/HProduct.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * Vocabulary to map the <a href="http://microformats.org/wiki/hitem">h-item</a> microformat.
+ *
+ * @author Nisala Nirmana
+ */
+
+public class HProduct extends Vocabulary {
+    public static final String NS = SINDICE.NS + "hproduct/";
+
+    private static HProduct instance;
+
+    public static HProduct getInstance() {
+        if(instance == null) {
+            instance = new HProduct();
+        }
+        return instance;
+    }
+
+    public URI product  = createClass(NS, "Product");
+
+
+    public URI name  = createProperty(NS, "name");
+    public URI photo  = createProperty(NS, "photo");
+    public URI brand = createProperty(NS, "brand");
+    public URI category  = createProperty(NS, "category");
+    public URI description  = createProperty(NS, "description");
+    public URI url = createProperty(NS, "url");
+    public URI identifier = createProperty(NS, "identifier");
+    public URI price = createProperty(NS, "price");
+    public URI review  = createProperty(NS, "review");
+
+
+    private HProduct() {
+        super(NS);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
new file mode 100644
index 0000000..8ce70a6
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.TagSoupExtractionResult;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.vocab.HEvent;
+import org.openrdf.model.BNode;
+import org.openrdf.model.URI;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+import org.apache.any23.extractor.html.HTMLDocument;
+
+import static org.apache.any23.extractor.html.HTMLDocument.TextField;
+
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/hcalendar">hCalendar</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HEventExtractor extends EntityBasedMicroformatExtractor {
+
+    private static final HEvent vEvent = HEvent.getInstance();
+
+    private String[] eventFields = {
+            "name",
+            "summary",
+            "start",
+            "end",
+            "duration",
+            "description",
+            "url",
+            "category",
+            "location", //toDO
+            "attendee" //toDO
+    };
+
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HEventExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    protected String getBaseClassName() {
+        return Microformats2Prefixes.CLASS_PREFIX+"event";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        // Empty.
+    }
+
+    @Override
+    protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
+        final BNode event = getBlankNodeFor(node);
+        conditionallyAddResourceProperty(event, RDF.TYPE, vEvent.event);
+        final HTMLDocument fragment = new HTMLDocument(node);
+        addName(fragment, event);
+        addSummary(fragment, event);
+        addStart(fragment, event);
+        addEnd(fragment, event);
+        addDuration(fragment, event);
+        addDescription(fragment, event);
+        addURLs(fragment, event);
+        addCategories(fragment, event);
+        addLocation(fragment, event);
+        
+        return true;
+    }
+
+    private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass,
+                                      URI property) {
+        HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
+        conditionallyAddStringProperty(
+                title.source(), recipe, property, title.value()
+        );
+    }
+
+    private void addName(HTMLDocument fragment, BNode event) {
+        mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
+                eventFields[0], vEvent.name);
+    }
+
+    private void addSummary(HTMLDocument fragment, BNode event) {
+        mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
+                eventFields[1], vEvent.summary);
+    }
+
+    private void addStart(HTMLDocument fragment, BNode event) {
+        final TextField start = fragment.getSingularTextField(
+                Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[2]);
+        if(start.source()==null)
+            return;
+        Node attribute = start.source().getAttributes().getNamedItem("datetime");
+        if (attribute == null) {
+            conditionallyAddStringProperty(
+                    start.source(),
+                    event, vEvent.start, start.value()
+            );
+        } else {
+            conditionallyAddStringProperty(
+                    start.source(),
+                    event, vEvent.start, attribute.getNodeValue()
+            );
+        }
+    }
+
+    private void addEnd(HTMLDocument fragment, BNode event) {
+        final TextField end = fragment.getSingularTextField(
+                Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[3]);
+        if(end.source()==null)
+            return;
+        Node attribute = end.source().getAttributes().getNamedItem("datetime");
+        if (attribute == null) {
+            conditionallyAddStringProperty(
+                    end.source(),
+                    event, vEvent.end, end.value()
+            );
+        } else {
+            conditionallyAddStringProperty(
+                    end.source(),
+                    event, vEvent.end, attribute.getNodeValue()
+            );
+        }
+    }
+
+    private void addDuration(HTMLDocument fragment, BNode event) {
+        final TextField duration = fragment.getSingularTextField(
+                Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[4]);
+        if(duration.source()==null)
+            return;
+        Node attribute = duration.source().getAttributes().getNamedItem("datetime");
+        if (attribute == null) {
+            conditionallyAddStringProperty(
+                    duration.source(),
+                    event, vEvent.duration, duration.value()
+            );
+        } else {
+            conditionallyAddStringProperty(
+                    duration.source(),
+                    event, vEvent.duration, attribute.getNodeValue()
+            );
+        }
+    }
+
+    private void addDescription(HTMLDocument fragment, BNode event) {
+        mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
+                eventFields[5], vEvent.description);
+    }
+
+    private void addURLs(HTMLDocument fragment, BNode event) throws ExtractionException {
+        final HTMLDocument.TextField[] urls = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + eventFields[6]);
+        for(HTMLDocument.TextField url : urls) {
+            addURIProperty(event, vEvent.url, fragment.resolveURI(url.value()));
+        }
+    }
+
+    private void addCategories(HTMLDocument fragment, BNode event) {
+        final HTMLDocument.TextField[] categories = fragment.getPluralTextField
+                (Microformats2Prefixes.PROPERTY_PREFIX + eventFields[7]);
+        for(HTMLDocument.TextField category : categories) {
+            conditionallyAddStringProperty(
+                    category.source(), event, vEvent.category, category.value()
+            );
+        }
+    }
+
+    private void addLocation(HTMLDocument fragment, BNode event) {
+        mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
+                eventFields[8], vEvent.location);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java
new file mode 100644
index 0000000..602b044
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+public class HEventExtractorFactory extends SimpleExtractorFactory<HEventExtractor> implements
+        ExtractorFactory<HEventExtractor> {
+
+    public static final String NAME = "html-mf2-h-event";
+    
+    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hevent");
+
+    private static final ExtractorDescription descriptionInstance = new HEventExtractorFactory();
+    
+    public HEventExtractorFactory() {
+        super(
+                HEventExtractorFactory.NAME,
+                HEventExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+                "example-mf2-h-event.html");
+    }
+    
+    @Override
+    public HEventExtractor createExtractor() {
+        return new HEventExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java
new file mode 100644
index 0000000..0e93935
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.extractor.html.HTMLDocument;
+import org.apache.any23.vocab.HProduct;
+import org.openrdf.model.BNode;
+import org.openrdf.model.URI;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/h-product">h-product</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HProductExtractor extends EntityBasedMicroformatExtractor {
+
+    private static final HProduct vProduct = HProduct.getInstance();
+
+    private static final String[] productFields = {
+            "name",
+            "photo",
+            "brand", //toDo
+            "category",
+            "description",
+            "url",
+            "identifier",
+            "review", //toDo
+            "price"
+    };
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HProductExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    protected String getBaseClassName() {
+        return Microformats2Prefixes.CLASS_PREFIX+"product";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        // Empty.
+    }
+
+    @Override
+    protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
+        final BNode product = getBlankNodeFor(node);
+        conditionallyAddResourceProperty(product, RDF.TYPE, vProduct.product);
+        final HTMLDocument fragment = new HTMLDocument(node);
+        addName(fragment, product);
+        addPhoto(fragment, product);
+        addCategories(fragment, product);
+        addDescription(fragment, product);
+        addURLs(fragment, product);
+        addIdentifiers(fragment, product);
+        addPrice(fragment, product);
+        return true;
+    }
+
+    private void mapFieldWithProperty(HTMLDocument fragment, BNode product, String fieldClass,
+                                      URI property) {
+        HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
+        conditionallyAddStringProperty(
+                title.source(), product, property, title.value()
+        );
+    }
+
+    private void addName(HTMLDocument fragment, BNode product) {
+        mapFieldWithProperty(fragment, product, Microformats2Prefixes.PROPERTY_PREFIX +
+                productFields[0], vProduct.name);
+    }
+
+    private void addPhoto(HTMLDocument fragment, BNode product) throws ExtractionException {
+        final HTMLDocument.TextField[] photos = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + productFields[1]);
+        for(HTMLDocument.TextField photo : photos) {
+            addURIProperty(product, vProduct.photo, fragment.resolveURI(photo.value()));
+        }
+    }
+
+    private void addCategories(HTMLDocument fragment, BNode product) {
+        final HTMLDocument.TextField[] categories = fragment.getPluralTextField
+                (Microformats2Prefixes.PROPERTY_PREFIX + productFields[3]);
+        for(HTMLDocument.TextField category : categories) {
+            conditionallyAddStringProperty(
+                    category.source(), product, vProduct.category, category.value()
+            );
+        }
+    }
+
+    private void addDescription(HTMLDocument fragment, BNode product) {
+        mapFieldWithProperty(fragment, product, Microformats2Prefixes.EMBEDDED_PROPERTY_PREFIX +
+                productFields[4], vProduct.description);
+    }
+
+    private void addURLs(HTMLDocument fragment, BNode product) throws ExtractionException {
+        final HTMLDocument.TextField[] urls = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + productFields[5]);
+        for(HTMLDocument.TextField url : urls) {
+            addURIProperty(product, vProduct.url, fragment.resolveURI(url.value()));
+        }
+    }
+
+    private void addIdentifiers(HTMLDocument fragment, BNode product) throws ExtractionException {
+        final HTMLDocument.TextField[] identifiers = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + productFields[6]);
+        for(HTMLDocument.TextField identifier :identifiers) {
+            addURIProperty(product, vProduct.identifier, fragment.resolveURI(identifier.value()));
+        }
+    }
+
+    private void addPrice(HTMLDocument fragment, BNode product) {
+        final HTMLDocument.TextField price = fragment.getSingularTextField(
+                Microformats2Prefixes.PROPERTY_PREFIX + productFields[8]);
+        if(price.source()==null)
+            return;
+        Node attribute = price.source().getAttributes().getNamedItem("value");
+        if (attribute == null) {
+            conditionallyAddStringProperty(
+                    price.source(),
+                    product, vProduct.price, price.value()
+            );
+        } else {
+            conditionallyAddStringProperty(
+                    price.source(),
+                    product, vProduct.price, attribute.getNodeValue()
+            );
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java
new file mode 100644
index 0000000..f4b65d9
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import java.util.Arrays;
+
+/**
+ * @author Nisala Nirmana
+ *
+ */
+public class HProductExtractorFactory extends SimpleExtractorFactory<HProductExtractor> implements
+        ExtractorFactory<HProductExtractor> {
+
+    public static final String NAME = "html-mf2-h-product";
+
+    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hproduct");
+
+    private static final ExtractorDescription descriptionInstance = new HProductExtractorFactory();
+
+    public HProductExtractorFactory() {
+        super(
+                HProductExtractorFactory.NAME,
+                HProductExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+                "example-mf2-h-product.html");
+    }
+
+    @Override
+    public HProductExtractor createExtractor() {
+        return new HProductExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
----------------------------------------------------------------------
diff --git a/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties b/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
index 58516ec..34e3975 100644
--- a/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
+++ b/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
@@ -32,6 +32,8 @@ ex=http://example.com/ns#
 wo=http://purl.org/ontology/wo/
 skos=http://www.w3.org/2004/02/skos/core#
 hrecipe=http://sindice.com/hrecipe/
+hevent=http://sindice.com/hevent/
+hproduct=http://sindice.com/hproduct/
 sindice=http://vocab.sindice.net/
 og=http://opengraphprotocol.org/schema/
 fb=http://www.facebook.com/2008/fbml#

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java
new file mode 100644
index 0000000..6c13909
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.junit.Test;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+public class HEventExtractorTest extends AbstractExtractorTestCase {
+    protected ExtractorFactory<?> getExtractorFactory() {
+        return new HEventExtractorFactory();
+    }
+
+    @Test
+    public void testModelNotEmpty() throws RepositoryException, RDFHandlerException {
+        assertExtract("/microformats2/h-event/h-event-test.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 9);
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java
new file mode 100644
index 0000000..3b46a7a
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.junit.Test;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+public class HProductExtractorTest extends AbstractExtractorTestCase {
+    protected ExtractorFactory<?> getExtractorFactory() {
+        return new HProductExtractorFactory();
+    }
+
+    @Test
+    public void testModelNotEmpty() throws RepositoryException, RDFHandlerException {
+        assertExtract("/microformats2/h-product/h-product-test.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 11);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/test-resources/src/test/resources/microformats2/h-event/h-event-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-event/h-event-test.html b/test-resources/src/test/resources/microformats2/h-event/h-event-test.html
new file mode 100644
index 0000000..b8af9de
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-event/h-event-test.html
@@ -0,0 +1,36 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE html>
+<html>
+
+<body>
+   <!-- Microformats 2 -->
+
+    <div class="h-event">
+        <h1 class="p-name">Microformats Meetup</h1>
+        <a class="u-url" href="http://microformats.org/meetup/">Official event web site</a>
+        <p>From 
+            <time class="dt-start" datetime="2013-06-30 12:00">30<sup>th</sup> June 2013, 12:00</time>
+            to <time class="dt-end" datetime="2013-06-30 18:00">18:00</time>
+            at <span class="p-location">Some bar in SF</span></p>
+        <p class="p-summary">Get together and discuss all things microformats-related.</p>
+        <p class="p-description">This <span class="p-category">technical meetup</span> is hosted in aid of discussion related to new draft specification of microformats 2</p>
+    </div>
+
+</body>
+
+</html>

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/test-resources/src/test/resources/microformats2/h-product/h-product-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-product/h-product-test.html b/test-resources/src/test/resources/microformats2/h-product/h-product-test.html
new file mode 100644
index 0000000..08ead4f
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-product/h-product-test.html
@@ -0,0 +1,36 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE html>
+<html>
+
+<body>
+   <!-- Microformats 2 -->
+
+   <div class="h-product">
+      <h1 class="p-name">Microformats For Dummies</h1>
+      <img class="u-photo" src="http://example.org/mfd.png" alt="" />
+      <div class="e-description">
+         <p>Want to get started using microformats, but intimidated by hyphens and mediawiki? This <span class="p-category">book</span>
+         contains everything you need to know!</p>
+      </div>
+         <p>Yours today for only <data class="p-price" value="20.00">$20.00</data>
+         from ACME Publishing inc.</p> 
+   </div>
+
+</body>
+
+</html>