You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by an...@apache.org on 2013/02/22 01:40:06 UTC
svn commit: r1448871 - in /any23/trunk:
api/src/main/java/org/apache/any23/vocab/
core/src/main/java/org/apache/any23/extractor/html/
core/src/main/resources/org/apache/any23/extractor/html/
Author: ansell
Date: Fri Feb 22 00:40:06 2013
New Revision: 1448871
URL: http://svn.apache.org/r1448871
Log:
ANY23-2 : Add HReviewAggregate vocabulary and extractor
Added:
any23/trunk/api/src/main/java/org/apache/any23/vocab/ReviewAggregate.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java
any23/trunk/core/src/main/resources/org/apache/any23/extractor/html/example-mf-hreview-aggregate.html
Added: any23/trunk/api/src/main/java/org/apache/any23/vocab/ReviewAggregate.java
URL: http://svn.apache.org/viewvc/any23/trunk/api/src/main/java/org/apache/any23/vocab/ReviewAggregate.java?rev=1448871&view=auto
==============================================================================
--- any23/trunk/api/src/main/java/org/apache/any23/vocab/ReviewAggregate.java (added)
+++ any23/trunk/api/src/main/java/org/apache/any23/vocab/ReviewAggregate.java Fri Feb 22 00:40:06 2013
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+public class ReviewAggregate extends Vocabulary {
+ private static ReviewAggregate instance;
+
+ public static ReviewAggregate getInstance() {
+ if(instance == null) {
+ instance = new ReviewAggregate();
+ }
+ return instance;
+ }
+
+ /**
+ * The namespace of the vocabulary as a string.
+ */
+ public static final String NS = "http://purl.org/stuff/revagg#";
+
+ /**
+ * The namespace of the vocabulary as a URI.
+ */
+ public final URI NAMESPACE = createURI(NS);
+
+ /**
+ * Number of usefulness votes (integer).
+ */
+ public final URI votes = createProperty("votes");
+
+ /**
+ * Number of usefulness reviews (integer).
+ */
+ public final URI count = createProperty("count");
+
+ /**
+ * Optional
+ */
+ public final URI average = createProperty("average");
+
+ public final URI worst = createProperty("worst");
+
+ public final URI best = createProperty("best");
+
+
+ /**
+ * An agg review of a work.
+ */
+ public final URI ReviewAggregate = createProperty("ReviewAggregate");
+
+ private URI createProperty(String localName) {
+ return createProperty(NS, localName);
+ }
+
+ private ReviewAggregate(){
+ super(NS);
+ }
+}
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java?rev=1448871&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java Fri Feb 22 00:40:06 2013
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html;
+
+import java.util.List;
+
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.TagSoupExtractionResult;
+import org.apache.any23.extractor.html.HTMLDocument.TextField;
+import org.apache.any23.vocab.Review;
+import org.apache.any23.vocab.ReviewAggregate;
+import org.apache.any23.vocab.VCard;
+import org.openrdf.model.BNode;
+import org.openrdf.model.Resource;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+
+/**
+ * Extractor for the <a
+ * href="http://microformats.org/wiki/hreview-aggregate">hReview-aggregate</a>
+ * microformat.
+ *
+ * @author Sebastien Richard
+ */
+public class HReviewAggregateExtractor extends EntityBasedMicroformatExtractor {
+ private static final Review vREVIEW = Review.getInstance();
+ private static final ReviewAggregate vREVIEWAGG = ReviewAggregate
+ .getInstance();
+ private static final VCard vVCARD = VCard.getInstance();
+
+ @Override
+ public ExtractorDescription getDescription() {
+ return HReviewAggregateExtractorFactory.getDescriptionInstance();
+ }
+
+ @Override
+ protected String getBaseClassName() {
+ return "hreview-aggregate";
+ }
+
+ @Override
+ protected void resetExtractor() {
+ // Empty.
+ }
+
+ @Override
+ protected boolean extractEntity(Node node, ExtractionResult out)
+ throws ExtractionException {
+ BNode rev = getBlankNodeFor(node);
+ out.writeTriple(rev, RDF.TYPE, vREVIEWAGG.ReviewAggregate);
+ final HTMLDocument fragment = new HTMLDocument(node);
+ addRating(fragment, rev);
+ addWorst(fragment, rev);
+ addBest(fragment, rev);
+ addAverage(fragment, rev);
+ addSummary(fragment, rev);
+ addType(fragment, rev);
+ addItem(fragment, rev);
+ addCount(fragment, rev);
+ addVotes(fragment, rev);
+
+ final TagSoupExtractionResult tser = (TagSoupExtractionResult) out;
+ tser.addResourceRoot(DomUtils.getXPathListForNode(node), rev,
+ this.getClass());
+
+ return true;
+ }
+
+ private void addType(HTMLDocument doc, Resource rev) {
+ TextField value = doc.getSingularTextField("type");
+ conditionallyAddStringProperty(value.source(), rev, vREVIEW.type,
+ value.value());
+ }
+
+ private void addItem(HTMLDocument root, BNode rev)
+ throws ExtractionException {
+ List<Node> nodes = root.findAllByClassName("item");
+ for (Node node : nodes) {
+ Resource item = findDummy(new HTMLDocument(node));
+ addBNodeProperty(node, item, vREVIEW.hasReview, rev);
+ }
+ }
+
+ private Resource findDummy(HTMLDocument item) throws ExtractionException {
+ Resource blank = getBlankNodeFor(item.getDocument());
+ TextField val = item.getSingularTextField("fn");
+ conditionallyAddStringProperty(val.source(), blank, vVCARD.fn,
+ val.value());
+ final TextField url = item.getSingularUrlField("url");
+ conditionallyAddResourceProperty(blank, vVCARD.url, getHTMLDocument()
+ .resolveURI(url.value()));
+ TextField pics[] = item.getPluralUrlField("photo");
+ for (TextField pic : pics) {
+ addURIProperty(blank, vVCARD.photo,
+ getHTMLDocument().resolveURI(pic.value()));
+ }
+ return blank;
+ }
+
+ private void addRating(HTMLDocument doc, Resource rev) {
+ HTMLDocument.TextField value = doc.getSingularTextField("rating");
+ conditionallyAddStringProperty(value.source(), rev, vREVIEW.rating,
+ value.value());
+ }
+
+ private void addWorst(HTMLDocument doc, Resource rev) {
+ HTMLDocument.TextField value = doc.getSingularTextField("worst");
+ conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.worst,
+ value.value());
+ }
+
+ private void addBest(HTMLDocument doc, Resource rev) {
+ HTMLDocument.TextField value = doc.getSingularTextField("best");
+ conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.best,
+ value.value());
+ }
+
+ private void addAverage(HTMLDocument doc, Resource rev) {
+ HTMLDocument.TextField value = doc.getSingularTextField("average");
+ conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.average,
+ value.value());
+ }
+
+ private void addCount(HTMLDocument doc, Resource rev) {
+ HTMLDocument.TextField value = doc.getSingularTextField("count");
+ conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.count,
+ value.value());
+ }
+
+ private void addVotes(HTMLDocument doc, Resource rev) {
+ HTMLDocument.TextField value = doc.getSingularTextField("votes");
+ conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.votes,
+ value.value());
+ }
+
+ private void addSummary(HTMLDocument doc, Resource rev) {
+ TextField value = doc.getSingularTextField("summary");
+ conditionallyAddStringProperty(value.source(), rev, vREVIEW.title,
+ value.value());
+ }
+}
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java?rev=1448871&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java Fri Feb 22 00:40:06 2013
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ *
+ * @author Peter Ansell p_ansell@yahoo.com
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class HReviewAggregateExtractorFactory extends SimpleExtractorFactory<HReviewAggregateExtractor> implements
+ ExtractorFactory<HReviewAggregateExtractor> {
+
+ public static final String NAME = "html-mf-hreview-aggregate";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "vcard", "rev");
+
+ private static final ExtractorDescription descriptionInstance = new HReviewAggregateExtractorFactory();
+
+ public HReviewAggregateExtractorFactory() {
+ super(
+ HReviewAggregateExtractorFactory.NAME,
+ HReviewAggregateExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1",
+ "application/xhtml+xml;q=0.1"),
+ "example-mf-hreview-aggregate.html");
+ }
+
+ @Override
+ public HReviewAggregateExtractor createExtractor() {
+ return new HReviewAggregateExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Added: any23/trunk/core/src/main/resources/org/apache/any23/extractor/html/example-mf-hreview-aggregate.html
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/resources/org/apache/any23/extractor/html/example-mf-hreview-aggregate.html?rev=1448871&view=auto
==============================================================================
--- any23/trunk/core/src/main/resources/org/apache/any23/extractor/html/example-mf-hreview-aggregate.html (added)
+++ any23/trunk/core/src/main/resources/org/apache/any23/extractor/html/example-mf-hreview-aggregate.html Fri Feb 22 00:40:06 2013
@@ -0,0 +1,26 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<div class="hreview-aggregate">
+ <h4 class="item"><span class="fn">Crepes on Cole</span></h4>
+ <span>
+ <span class="rating">4.5</span> out of 5 based on <span class="votes">12</span> ratings.
+ </span>
+
+ <span>
+ <span class="count">5</span> user reviews.
+ </span>
+</div>
\ No newline at end of file