You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by an...@apache.org on 2013/02/22 01:40:06 UTC

svn commit: r1448871 - in /any23/trunk: api/src/main/java/org/apache/any23/vocab/ core/src/main/java/org/apache/any23/extractor/html/ core/src/main/resources/org/apache/any23/extractor/html/

Author: ansell
Date: Fri Feb 22 00:40:06 2013
New Revision: 1448871

URL: http://svn.apache.org/r1448871
Log:
ANY23-2 : Add HReviewAggregate vocabulary and extractor

Added:
    any23/trunk/api/src/main/java/org/apache/any23/vocab/ReviewAggregate.java
    any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java
    any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java
    any23/trunk/core/src/main/resources/org/apache/any23/extractor/html/example-mf-hreview-aggregate.html

Added: any23/trunk/api/src/main/java/org/apache/any23/vocab/ReviewAggregate.java
URL: http://svn.apache.org/viewvc/any23/trunk/api/src/main/java/org/apache/any23/vocab/ReviewAggregate.java?rev=1448871&view=auto
==============================================================================
--- any23/trunk/api/src/main/java/org/apache/any23/vocab/ReviewAggregate.java (added)
+++ any23/trunk/api/src/main/java/org/apache/any23/vocab/ReviewAggregate.java Fri Feb 22 00:40:06 2013
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+public class ReviewAggregate extends Vocabulary {
+    private static ReviewAggregate instance;
+
+    public static ReviewAggregate getInstance() {
+        if(instance == null) {
+            instance = new ReviewAggregate();
+        }
+        return instance;
+    }
+    
+    /**
+     * The namespace of the vocabulary as a string.
+     */
+    public static final String NS = "http://purl.org/stuff/revagg#";
+
+    /**
+     * The namespace of the vocabulary as a URI.
+     */
+    public final URI NAMESPACE = createURI(NS);
+    
+    /**
+     * Number of usefulness votes (integer).
+     */
+    public final URI votes = createProperty("votes");
+    
+    /**
+     * Number of usefulness reviews (integer).
+     */
+    public final URI count = createProperty("count");
+    
+    /**
+     * Optional
+     */
+    public final URI average = createProperty("average");
+    
+    public final URI worst = createProperty("worst");
+    
+    public final URI best = createProperty("best");
+
+    
+     /**
+     * An agg review of a work.
+     */
+    public final URI ReviewAggregate = createProperty("ReviewAggregate");
+
+    private URI createProperty(String localName) {
+        return createProperty(NS, localName);
+    }
+    
+    private ReviewAggregate(){
+        super(NS);
+    }
+}

Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java?rev=1448871&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java Fri Feb 22 00:40:06 2013
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html;
+
+import java.util.List;
+
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.TagSoupExtractionResult;
+import org.apache.any23.extractor.html.HTMLDocument.TextField;
+import org.apache.any23.vocab.Review;
+import org.apache.any23.vocab.ReviewAggregate;
+import org.apache.any23.vocab.VCard;
+import org.openrdf.model.BNode;
+import org.openrdf.model.Resource;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+
+/**
+ * Extractor for the <a
+ * href="http://microformats.org/wiki/hreview-aggregate">hReview-aggregate</a>
+ * microformat.
+ * 
+ * @author Sebastien Richard
+ */
+public class HReviewAggregateExtractor extends EntityBasedMicroformatExtractor {
+    private static final Review vREVIEW = Review.getInstance();
+    private static final ReviewAggregate vREVIEWAGG = ReviewAggregate
+            .getInstance();
+    private static final VCard vVCARD = VCard.getInstance();
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HReviewAggregateExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    protected String getBaseClassName() {
+        return "hreview-aggregate";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        // Empty.
+    }
+
+    @Override
+    protected boolean extractEntity(Node node, ExtractionResult out)
+            throws ExtractionException {
+        BNode rev = getBlankNodeFor(node);
+        out.writeTriple(rev, RDF.TYPE, vREVIEWAGG.ReviewAggregate);
+        final HTMLDocument fragment = new HTMLDocument(node);
+        addRating(fragment, rev);
+        addWorst(fragment, rev);
+        addBest(fragment, rev);
+        addAverage(fragment, rev);
+        addSummary(fragment, rev);
+        addType(fragment, rev);
+        addItem(fragment, rev);
+        addCount(fragment, rev);
+        addVotes(fragment, rev);
+
+        final TagSoupExtractionResult tser = (TagSoupExtractionResult) out;
+        tser.addResourceRoot(DomUtils.getXPathListForNode(node), rev,
+                this.getClass());
+
+        return true;
+    }
+
+    private void addType(HTMLDocument doc, Resource rev) {
+        TextField value = doc.getSingularTextField("type");
+        conditionallyAddStringProperty(value.source(), rev, vREVIEW.type,
+                value.value());
+    }
+
+    private void addItem(HTMLDocument root, BNode rev)
+            throws ExtractionException {
+        List<Node> nodes = root.findAllByClassName("item");
+        for (Node node : nodes) {
+            Resource item = findDummy(new HTMLDocument(node));
+            addBNodeProperty(node, item, vREVIEW.hasReview, rev);
+        }
+    }
+
+    private Resource findDummy(HTMLDocument item) throws ExtractionException {
+        Resource blank = getBlankNodeFor(item.getDocument());
+        TextField val = item.getSingularTextField("fn");
+        conditionallyAddStringProperty(val.source(), blank, vVCARD.fn,
+                val.value());
+        final TextField url = item.getSingularUrlField("url");
+        conditionallyAddResourceProperty(blank, vVCARD.url, getHTMLDocument()
+                .resolveURI(url.value()));
+        TextField pics[] = item.getPluralUrlField("photo");
+        for (TextField pic : pics) {
+            addURIProperty(blank, vVCARD.photo,
+                    getHTMLDocument().resolveURI(pic.value()));
+        }
+        return blank;
+    }
+
+    private void addRating(HTMLDocument doc, Resource rev) {
+        HTMLDocument.TextField value = doc.getSingularTextField("rating");
+        conditionallyAddStringProperty(value.source(), rev, vREVIEW.rating,
+                value.value());
+    }
+
+    private void addWorst(HTMLDocument doc, Resource rev) {
+        HTMLDocument.TextField value = doc.getSingularTextField("worst");
+        conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.worst,
+                value.value());
+    }
+
+    private void addBest(HTMLDocument doc, Resource rev) {
+        HTMLDocument.TextField value = doc.getSingularTextField("best");
+        conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.best,
+                value.value());
+    }
+
+    private void addAverage(HTMLDocument doc, Resource rev) {
+        HTMLDocument.TextField value = doc.getSingularTextField("average");
+        conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.average,
+                value.value());
+    }
+
+    private void addCount(HTMLDocument doc, Resource rev) {
+        HTMLDocument.TextField value = doc.getSingularTextField("count");
+        conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.count,
+                value.value());
+    }
+
+    private void addVotes(HTMLDocument doc, Resource rev) {
+        HTMLDocument.TextField value = doc.getSingularTextField("votes");
+        conditionallyAddStringProperty(value.source(), rev, vREVIEWAGG.votes,
+                value.value());
+    }
+
+    private void addSummary(HTMLDocument doc, Resource rev) {
+        TextField value = doc.getSingularTextField("summary");
+        conditionallyAddStringProperty(value.source(), rev, vREVIEW.title,
+                value.value());
+    }
+}

Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java?rev=1448871&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java Fri Feb 22 00:40:06 2013
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * 
+ * @author Peter Ansell p_ansell@yahoo.com
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class HReviewAggregateExtractorFactory extends SimpleExtractorFactory<HReviewAggregateExtractor> implements
+        ExtractorFactory<HReviewAggregateExtractor> {
+
+    public static final String NAME = "html-mf-hreview-aggregate";
+    
+    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "vcard", "rev");
+
+    private static final ExtractorDescription descriptionInstance = new HReviewAggregateExtractorFactory();
+    
+    public HReviewAggregateExtractorFactory() {
+        super(
+                HReviewAggregateExtractorFactory.NAME, 
+                HReviewAggregateExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1",
+                        "application/xhtml+xml;q=0.1"),
+                        "example-mf-hreview-aggregate.html");
+    }
+    
+    @Override
+    public HReviewAggregateExtractor createExtractor() {
+        return new HReviewAggregateExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

Added: any23/trunk/core/src/main/resources/org/apache/any23/extractor/html/example-mf-hreview-aggregate.html
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/resources/org/apache/any23/extractor/html/example-mf-hreview-aggregate.html?rev=1448871&view=auto
==============================================================================
--- any23/trunk/core/src/main/resources/org/apache/any23/extractor/html/example-mf-hreview-aggregate.html (added)
+++ any23/trunk/core/src/main/resources/org/apache/any23/extractor/html/example-mf-hreview-aggregate.html Fri Feb 22 00:40:06 2013
@@ -0,0 +1,26 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<div class="hreview-aggregate">
+    <h4 class="item"><span class="fn">Crepes on Cole</span></h4>
+	 <span>
+	 	<span class="rating">4.5</span> out of 5 based on <span class="votes">12</span> ratings.
+	 </span>
+	 
+	 <span>
+	 	<span class="count">5</span> user reviews.
+	 </span>
+</div>
\ No newline at end of file