You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2015/09/10 09:50:49 UTC
[05/13] any23 git commit: adding HEvent and HProduct Extractors
adding HEvent and HProduct Extractors
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/cc0dfbe8
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/cc0dfbe8
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/cc0dfbe8
Branch: refs/heads/master
Commit: cc0dfbe8127a00fa712c7d2df6785a73c290feae
Parents: 1616c17
Author: Nisala <ni...@gmail.com>
Authored: Mon Jul 20 01:12:27 2015 +0530
Committer: Nisala <ni...@gmail.com>
Committed: Mon Jul 20 01:12:27 2015 +0530
----------------------------------------------------------------------
.../java/org/apache/any23/vocab/HEvent.java | 57 ++++++
.../java/org/apache/any23/vocab/HProduct.java | 58 ++++++
.../html/microformats2/HEventExtractor.java | 195 +++++++++++++++++++
.../microformats2/HEventExtractorFactory.java | 57 ++++++
.../html/microformats2/HProductExtractor.java | 153 +++++++++++++++
.../microformats2/HProductExtractorFactory.java | 56 ++++++
.../apache/any23/prefixes/prefixes.properties | 2 +
.../html/microformats2/HEventExtractorTest.java | 37 ++++
.../microformats2/HProductExtractorTest.java | 37 ++++
.../microformats2/h-event/h-event-test.html | 36 ++++
.../microformats2/h-product/h-product-test.html | 36 ++++
11 files changed, 724 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/api/src/main/java/org/apache/any23/vocab/HEvent.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/HEvent.java b/api/src/main/java/org/apache/any23/vocab/HEvent.java
new file mode 100644
index 0000000..b936c3e
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/vocab/HEvent.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * Vocabulary to map the <a href="http://microformats.org/wiki/h-event">h-event</a> microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HEvent extends Vocabulary {
+ public static final String NS = SINDICE.NS + "hevent/";
+
+ private static HEvent instance;
+
+ public static HEvent getInstance() {
+ if(instance == null) {
+ instance = new HEvent();
+ }
+ return instance;
+ }
+
+ public URI event = createClass(NS, "Event");
+
+
+ public URI name = createProperty(NS, "name");
+ public URI summary = createProperty(NS, "summary");
+ public URI start = createProperty(NS, "start");
+ public URI end = createProperty(NS, "end");
+ public URI duration = createProperty(NS, "duration");
+ public URI description = createProperty(NS, "description");
+ public URI url = createProperty(NS, "url");
+ public URI category = createProperty(NS, "category");
+ public URI location = createProperty(NS, "location");
+ public URI attendee = createProperty(NS, "attendee");
+
+
+ private HEvent() {
+ super(NS);
+ }
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/api/src/main/java/org/apache/any23/vocab/HProduct.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/HProduct.java b/api/src/main/java/org/apache/any23/vocab/HProduct.java
new file mode 100644
index 0000000..9630db3
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/vocab/HProduct.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * Vocabulary to map the <a href="http://microformats.org/wiki/hitem">h-item</a> microformat.
+ *
+ * @author Nisala Nirmana
+ */
+
+public class HProduct extends Vocabulary {
+ public static final String NS = SINDICE.NS + "hproduct/";
+
+ private static HProduct instance;
+
+ public static HProduct getInstance() {
+ if(instance == null) {
+ instance = new HProduct();
+ }
+ return instance;
+ }
+
+ public URI product = createClass(NS, "Product");
+
+
+ public URI name = createProperty(NS, "name");
+ public URI photo = createProperty(NS, "photo");
+ public URI brand = createProperty(NS, "brand");
+ public URI category = createProperty(NS, "category");
+ public URI description = createProperty(NS, "description");
+ public URI url = createProperty(NS, "url");
+ public URI identifier = createProperty(NS, "identifier");
+ public URI price = createProperty(NS, "price");
+ public URI review = createProperty(NS, "review");
+
+
+ private HProduct() {
+ super(NS);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
new file mode 100644
index 0000000..8ce70a6
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.TagSoupExtractionResult;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.vocab.HEvent;
+import org.openrdf.model.BNode;
+import org.openrdf.model.URI;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+import org.apache.any23.extractor.html.HTMLDocument;
+
+import static org.apache.any23.extractor.html.HTMLDocument.TextField;
+
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/hcalendar">hCalendar</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HEventExtractor extends EntityBasedMicroformatExtractor {
+
+ private static final HEvent vEvent = HEvent.getInstance();
+
+ private String[] eventFields = {
+ "name",
+ "summary",
+ "start",
+ "end",
+ "duration",
+ "description",
+ "url",
+ "category",
+ "location", //toDO
+ "attendee" //toDO
+ };
+
+
+ @Override
+ public ExtractorDescription getDescription() {
+ return HEventExtractorFactory.getDescriptionInstance();
+ }
+
+ @Override
+ protected String getBaseClassName() {
+ return Microformats2Prefixes.CLASS_PREFIX+"event";
+ }
+
+ @Override
+ protected void resetExtractor() {
+ // Empty.
+ }
+
+ @Override
+ protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
+ final BNode event = getBlankNodeFor(node);
+ conditionallyAddResourceProperty(event, RDF.TYPE, vEvent.event);
+ final HTMLDocument fragment = new HTMLDocument(node);
+ addName(fragment, event);
+ addSummary(fragment, event);
+ addStart(fragment, event);
+ addEnd(fragment, event);
+ addDuration(fragment, event);
+ addDescription(fragment, event);
+ addURLs(fragment, event);
+ addCategories(fragment, event);
+ addLocation(fragment, event);
+
+ return true;
+ }
+
+ private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass,
+ URI property) {
+ HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
+ conditionallyAddStringProperty(
+ title.source(), recipe, property, title.value()
+ );
+ }
+
+ private void addName(HTMLDocument fragment, BNode event) {
+ mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
+ eventFields[0], vEvent.name);
+ }
+
+ private void addSummary(HTMLDocument fragment, BNode event) {
+ mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
+ eventFields[1], vEvent.summary);
+ }
+
+ private void addStart(HTMLDocument fragment, BNode event) {
+ final TextField start = fragment.getSingularTextField(
+ Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[2]);
+ if(start.source()==null)
+ return;
+ Node attribute = start.source().getAttributes().getNamedItem("datetime");
+ if (attribute == null) {
+ conditionallyAddStringProperty(
+ start.source(),
+ event, vEvent.start, start.value()
+ );
+ } else {
+ conditionallyAddStringProperty(
+ start.source(),
+ event, vEvent.start, attribute.getNodeValue()
+ );
+ }
+ }
+
+ private void addEnd(HTMLDocument fragment, BNode event) {
+ final TextField end = fragment.getSingularTextField(
+ Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[3]);
+ if(end.source()==null)
+ return;
+ Node attribute = end.source().getAttributes().getNamedItem("datetime");
+ if (attribute == null) {
+ conditionallyAddStringProperty(
+ end.source(),
+ event, vEvent.end, end.value()
+ );
+ } else {
+ conditionallyAddStringProperty(
+ end.source(),
+ event, vEvent.end, attribute.getNodeValue()
+ );
+ }
+ }
+
+ private void addDuration(HTMLDocument fragment, BNode event) {
+ final TextField duration = fragment.getSingularTextField(
+ Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[4]);
+ if(duration.source()==null)
+ return;
+ Node attribute = duration.source().getAttributes().getNamedItem("datetime");
+ if (attribute == null) {
+ conditionallyAddStringProperty(
+ duration.source(),
+ event, vEvent.duration, duration.value()
+ );
+ } else {
+ conditionallyAddStringProperty(
+ duration.source(),
+ event, vEvent.duration, attribute.getNodeValue()
+ );
+ }
+ }
+
+ private void addDescription(HTMLDocument fragment, BNode event) {
+ mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
+ eventFields[5], vEvent.description);
+ }
+
+ private void addURLs(HTMLDocument fragment, BNode event) throws ExtractionException {
+ final HTMLDocument.TextField[] urls = fragment.getPluralUrlField
+ (Microformats2Prefixes.URL_PROPERTY_PREFIX + eventFields[6]);
+ for(HTMLDocument.TextField url : urls) {
+ addURIProperty(event, vEvent.url, fragment.resolveURI(url.value()));
+ }
+ }
+
+ private void addCategories(HTMLDocument fragment, BNode event) {
+ final HTMLDocument.TextField[] categories = fragment.getPluralTextField
+ (Microformats2Prefixes.PROPERTY_PREFIX + eventFields[7]);
+ for(HTMLDocument.TextField category : categories) {
+ conditionallyAddStringProperty(
+ category.source(), event, vEvent.category, category.value()
+ );
+ }
+ }
+
+ private void addLocation(HTMLDocument fragment, BNode event) {
+ mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
+ eventFields[8], vEvent.location);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java
new file mode 100644
index 0000000..602b044
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+public class HEventExtractorFactory extends SimpleExtractorFactory<HEventExtractor> implements
+ ExtractorFactory<HEventExtractor> {
+
+ public static final String NAME = "html-mf2-h-event";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hevent");
+
+ private static final ExtractorDescription descriptionInstance = new HEventExtractorFactory();
+
+ public HEventExtractorFactory() {
+ super(
+ HEventExtractorFactory.NAME,
+ HEventExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+ "example-mf2-h-event.html");
+ }
+
+ @Override
+ public HEventExtractor createExtractor() {
+ return new HEventExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java
new file mode 100644
index 0000000..0e93935
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.extractor.html.HTMLDocument;
+import org.apache.any23.vocab.HProduct;
+import org.openrdf.model.BNode;
+import org.openrdf.model.URI;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/h-product">h-product</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HProductExtractor extends EntityBasedMicroformatExtractor {
+
+ private static final HProduct vProduct = HProduct.getInstance();
+
+ private static final String[] productFields = {
+ "name",
+ "photo",
+ "brand", //toDo
+ "category",
+ "description",
+ "url",
+ "identifier",
+ "review", //toDo
+ "price"
+ };
+
+ @Override
+ public ExtractorDescription getDescription() {
+ return HProductExtractorFactory.getDescriptionInstance();
+ }
+
+ @Override
+ protected String getBaseClassName() {
+ return Microformats2Prefixes.CLASS_PREFIX+"product";
+ }
+
+ @Override
+ protected void resetExtractor() {
+ // Empty.
+ }
+
+ @Override
+ protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
+ final BNode product = getBlankNodeFor(node);
+ conditionallyAddResourceProperty(product, RDF.TYPE, vProduct.product);
+ final HTMLDocument fragment = new HTMLDocument(node);
+ addName(fragment, product);
+ addPhoto(fragment, product);
+ addCategories(fragment, product);
+ addDescription(fragment, product);
+ addURLs(fragment, product);
+ addIdentifiers(fragment, product);
+ addPrice(fragment, product);
+ return true;
+ }
+
+ private void mapFieldWithProperty(HTMLDocument fragment, BNode product, String fieldClass,
+ URI property) {
+ HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
+ conditionallyAddStringProperty(
+ title.source(), product, property, title.value()
+ );
+ }
+
+ private void addName(HTMLDocument fragment, BNode product) {
+ mapFieldWithProperty(fragment, product, Microformats2Prefixes.PROPERTY_PREFIX +
+ productFields[0], vProduct.name);
+ }
+
+ private void addPhoto(HTMLDocument fragment, BNode product) throws ExtractionException {
+ final HTMLDocument.TextField[] photos = fragment.getPluralUrlField
+ (Microformats2Prefixes.URL_PROPERTY_PREFIX + productFields[1]);
+ for(HTMLDocument.TextField photo : photos) {
+ addURIProperty(product, vProduct.photo, fragment.resolveURI(photo.value()));
+ }
+ }
+
+ private void addCategories(HTMLDocument fragment, BNode product) {
+ final HTMLDocument.TextField[] categories = fragment.getPluralTextField
+ (Microformats2Prefixes.PROPERTY_PREFIX + productFields[3]);
+ for(HTMLDocument.TextField category : categories) {
+ conditionallyAddStringProperty(
+ category.source(), product, vProduct.category, category.value()
+ );
+ }
+ }
+
+ private void addDescription(HTMLDocument fragment, BNode product) {
+ mapFieldWithProperty(fragment, product, Microformats2Prefixes.EMBEDDED_PROPERTY_PREFIX +
+ productFields[4], vProduct.description);
+ }
+
+ private void addURLs(HTMLDocument fragment, BNode product) throws ExtractionException {
+ final HTMLDocument.TextField[] urls = fragment.getPluralUrlField
+ (Microformats2Prefixes.URL_PROPERTY_PREFIX + productFields[5]);
+ for(HTMLDocument.TextField url : urls) {
+ addURIProperty(product, vProduct.url, fragment.resolveURI(url.value()));
+ }
+ }
+
+ private void addIdentifiers(HTMLDocument fragment, BNode product) throws ExtractionException {
+ final HTMLDocument.TextField[] identifiers = fragment.getPluralUrlField
+ (Microformats2Prefixes.URL_PROPERTY_PREFIX + productFields[6]);
+ for(HTMLDocument.TextField identifier :identifiers) {
+ addURIProperty(product, vProduct.identifier, fragment.resolveURI(identifier.value()));
+ }
+ }
+
+ private void addPrice(HTMLDocument fragment, BNode product) {
+ final HTMLDocument.TextField price = fragment.getSingularTextField(
+ Microformats2Prefixes.PROPERTY_PREFIX + productFields[8]);
+ if(price.source()==null)
+ return;
+ Node attribute = price.source().getAttributes().getNamedItem("value");
+ if (attribute == null) {
+ conditionallyAddStringProperty(
+ price.source(),
+ product, vProduct.price, price.value()
+ );
+ } else {
+ conditionallyAddStringProperty(
+ price.source(),
+ product, vProduct.price, attribute.getNodeValue()
+ );
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java
new file mode 100644
index 0000000..f4b65d9
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import java.util.Arrays;
+
+/**
+ * @author Nisala Nirmana
+ *
+ */
+public class HProductExtractorFactory extends SimpleExtractorFactory<HProductExtractor> implements
+ ExtractorFactory<HProductExtractor> {
+
+ public static final String NAME = "html-mf2-h-product";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hproduct");
+
+ private static final ExtractorDescription descriptionInstance = new HProductExtractorFactory();
+
+ public HProductExtractorFactory() {
+ super(
+ HProductExtractorFactory.NAME,
+ HProductExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+ "example-mf2-h-product.html");
+ }
+
+ @Override
+ public HProductExtractor createExtractor() {
+ return new HProductExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
----------------------------------------------------------------------
diff --git a/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties b/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
index 58516ec..34e3975 100644
--- a/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
+++ b/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
@@ -32,6 +32,8 @@ ex=http://example.com/ns#
wo=http://purl.org/ontology/wo/
skos=http://www.w3.org/2004/02/skos/core#
hrecipe=http://sindice.com/hrecipe/
+hevent=http://sindice.com/hevent/
+hproduct=http://sindice.com/hproduct/
sindice=http://vocab.sindice.net/
og=http://opengraphprotocol.org/schema/
fb=http://www.facebook.com/2008/fbml#
http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java
new file mode 100644
index 0000000..6c13909
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.junit.Test;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+public class HEventExtractorTest extends AbstractExtractorTestCase {
+ protected ExtractorFactory<?> getExtractorFactory() {
+ return new HEventExtractorFactory();
+ }
+
+ @Test
+ public void testModelNotEmpty() throws RepositoryException, RDFHandlerException {
+ assertExtract("/microformats2/h-event/h-event-test.html");
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 9);
+ }
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java
new file mode 100644
index 0000000..3b46a7a
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.junit.Test;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+public class HProductExtractorTest extends AbstractExtractorTestCase {
+ protected ExtractorFactory<?> getExtractorFactory() {
+ return new HProductExtractorFactory();
+ }
+
+ @Test
+ public void testModelNotEmpty() throws RepositoryException, RDFHandlerException {
+ assertExtract("/microformats2/h-product/h-product-test.html");
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 11);
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/test-resources/src/test/resources/microformats2/h-event/h-event-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-event/h-event-test.html b/test-resources/src/test/resources/microformats2/h-event/h-event-test.html
new file mode 100644
index 0000000..b8af9de
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-event/h-event-test.html
@@ -0,0 +1,36 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!DOCTYPE html>
+<html>
+
+<body>
+ <!-- Microformats 2 -->
+
+ <div class="h-event">
+ <h1 class="p-name">Microformats Meetup</h1>
+ <a class="u-url" href="http://microformats.org/meetup/">Official event web site</a>
+ <p>From
+ <time class="dt-start" datetime="2013-06-30 12:00">30<sup>th</sup> June 2013, 12:00</time>
+ to <time class="dt-end" datetime="2013-06-30 18:00">18:00</time>
+ at <span class="p-location">Some bar in SF</span></p>
+ <p class="p-summary">Get together and discuss all things microformats-related.</p>
+ <p class="p-description">This <span class="p-category">technical meetup</span> is hosted in aid of discussion related to new draft specification of microformats 2</p>
+ </div>
+
+</body>
+
+</html>
http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/test-resources/src/test/resources/microformats2/h-product/h-product-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-product/h-product-test.html b/test-resources/src/test/resources/microformats2/h-product/h-product-test.html
new file mode 100644
index 0000000..08ead4f
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-product/h-product-test.html
@@ -0,0 +1,36 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!DOCTYPE html>
+<html>
+
+<body>
+ <!-- Microformats 2 -->
+
+ <div class="h-product">
+ <h1 class="p-name">Microformats For Dummies</h1>
+ <img class="u-photo" src="http://example.org/mfd.png" alt="" />
+ <div class="e-description">
+ <p>Want to get started using microformats, but intimidated by hyphens and mediawiki? This <span class="p-category">book</span>
+ contains everything you need to know!</p>
+ </div>
+ <p>Yours today for only <data class="p-price" value="20.00">$20.00</data>
+ from ACME Publishing inc.</p>
+ </div>
+
+</body>
+
+</html>