You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by ja...@apache.org on 2013/02/21 16:30:45 UTC
[30/55] MARMOTTA-106: renamed sesame-rio modules
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/21a28cf8/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/AtomParser.java
----------------------------------------------------------------------
diff --git a/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/AtomParser.java b/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/AtomParser.java
deleted file mode 100644
index 38ce5cb..0000000
--- a/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/AtomParser.java
+++ /dev/null
@@ -1,418 +0,0 @@
-/**
- * Copyright (C) 2013 Salzburg Research.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package at.newmedialab.sesame.rio.rss;
-
-import com.google.common.base.Preconditions;
-import com.sun.syndication.feed.WireFeed;
-import com.sun.syndication.feed.atom.*;
-import com.sun.syndication.feed.module.DCModule;
-import com.sun.syndication.feed.module.Module;
-import com.sun.syndication.feed.module.SyModule;
-import com.sun.syndication.io.FeedException;
-import com.sun.syndication.io.WireFeedInput;
-import org.openrdf.model.Resource;
-import org.openrdf.model.URI;
-import org.openrdf.model.ValueFactory;
-import org.openrdf.model.impl.ValueFactoryImpl;
-import org.openrdf.rio.RDFFormat;
-import org.openrdf.rio.RDFHandlerException;
-import org.openrdf.rio.RDFParseException;
-import org.rometools.feed.module.content.ContentModule;
-import org.rometools.feed.module.georss.GeoRSSModule;
-import org.rometools.feed.module.mediarss.MediaEntryModule;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.InputSource;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Reader;
-import java.io.UnsupportedEncodingException;
-import java.net.URLEncoder;
-
-/**
- * Parse Atom feed into RDF. Uses the following vocabularies:
- * <ul>
- * <li>dcterms for representing most metadata about feeds and entries</li>
- * <li>sioc for type information and relation between concepts</li>
- * <li>skos for representing categories associated with items or channels</li>
- * <li>media ontology for representing information from the mediarss extension</li>
- * <li>wgs84 geo ontology for representing information from the georss extension</li>
- * </ul>
- * <p/>
- * Author: Sebastian Schaffert
- */
-public class AtomParser extends FeedParserBase {
-
-
- private static Logger log = LoggerFactory.getLogger(AtomParser.class);
-
-
- /**
- * Creates a new RDFParserBase that will use a {@link org.openrdf.model.impl.ValueFactoryImpl} to
- * create RDF model objects.
- */
- public AtomParser() {
- this(new ValueFactoryImpl());
- }
-
- /**
- * Creates a new RDFParserBase that will use the supplied ValueFactory to
- * create RDF model objects.
- *
- * @param valueFactory A ValueFactory.
- */
- public AtomParser(ValueFactory valueFactory) {
- super(valueFactory);
- this.valueFactory = valueFactory;
- }
-
-
-
- /**
- * Gets the RDF format that this parser can parse.
- */
- @Override
- public RDFFormat getRDFFormat() {
- return AtomFormat.FORMAT;
- }
-
-
- /**
- * Parses the data from the supplied InputStream, using the supplied baseURI
- * to resolve any relative URI references.
- *
- * @param in The InputStream from which to read the data.
- * @param baseURI The URI associated with the data in the InputStream.
- * @throws java.io.IOException If an I/O error occurred while data was read from the InputStream.
- * @throws org.openrdf.rio.RDFParseException
- * If the parser has found an unrecoverable parse error.
- * @throws org.openrdf.rio.RDFHandlerException
- * If the configured statement handler has encountered an
- * unrecoverable error.
- */
- @Override
- public void parse(InputStream in, String baseURI) throws IOException, RDFParseException, RDFHandlerException {
- Preconditions.checkNotNull(baseURI);
-
- setBaseURI(baseURI);
-
- WireFeedInput input = new WireFeedInput();
- try {
- WireFeed feed = input.build(new InputSource(in));
- if(feed instanceof Feed) {
- parseFeed((Feed) feed);
- } else {
- throw new RDFParseException("data stream is not an RSS feed");
- }
- } catch (FeedException e) {
- throw new RDFParseException(e);
- }
- }
-
- /**
- * Parses the data from the supplied Reader, using the supplied baseURI to
- * resolve any relative URI references.
- *
- * @param reader The Reader from which to read the data.
- * @param baseURI The URI associated with the data in the InputStream.
- * @throws java.io.IOException If an I/O error occurred while data was read from the InputStream.
- * @throws org.openrdf.rio.RDFParseException
- * If the parser has found an unrecoverable parse error.
- * @throws org.openrdf.rio.RDFHandlerException
- * If the configured statement handler has encountered an
- * unrecoverable error.
- */
- @Override
- public void parse(Reader reader, String baseURI) throws IOException, RDFParseException, RDFHandlerException {
- Preconditions.checkNotNull(baseURI);
-
- setBaseURI(baseURI);
-
- WireFeedInput input = new WireFeedInput();
- try {
- WireFeed feed = input.build(reader);
- if(feed instanceof Feed) {
- parseFeed((Feed) feed);
- } else {
- throw new RDFParseException("data stream is not an RSS feed");
- }
- } catch (FeedException e) {
- throw new RDFParseException(e);
- }
- }
-
-
-
- private void parseFeedEntry(final Entry entry, final Resource r_feed) throws RDFParseException, RDFHandlerException {
-
- final String entryURI = entry.getId();
-
- URI r_entry = createURI(entryURI);
- URI rdf_type = createURI(NS_RDF + "type");
-
-
- // add type sioc:Post
- rdfHandler.handleStatement(createStatement(r_entry, rdf_type, createURI(NS_SIOC + "Post")));
-
- // add as sioc:container_of from parent feed
- rdfHandler.handleStatement(createStatement(r_feed, createURI(NS_SIOC + "container_of"), r_entry));
- rdfHandler.handleStatement(createStatement(r_entry, createURI(NS_SIOC + "has_container"), r_feed));
-
- // for each link we create a sioc:link
- for(Link link : entry.getAlternateLinks()) {
- createUrlProperty(r_entry,NS_SIOC + "link", resolveURI(link.getHref()));
- }
-
- // add all authors as dc:creator
- for(Person person : entry.getAuthors()) {
- parsePerson(r_entry, person, "creator");
- }
-
- for(Object category : entry.getCategories()) {
- parseCategory(r_entry, (Category)category);
- }
-
- for(Content content : entry.getContents()) {
- createStringProperty(r_entry, NS_RSS_CONTENT + "encoded", content.getValue());
- createStringProperty(r_entry, NS_RSS_CONTENT + "format", content.getType());
- createStringProperty(r_entry, NS_DC_TERMS + "description",content.getValue());
- }
-
- // add all authors as dc:creator
- for(Person person : entry.getContributors()) {
- parsePerson(r_entry, person, "contributor");
- }
-
- createDateProperty(r_entry, NS_DC_TERMS + "created", entry.getCreated());
-
- // ignore foreign markup
-
- createDateProperty(r_entry, NS_DC_TERMS + "issued", entry.getPublished());
- createDateProperty(r_entry, NS_DC_TERMS + "modified", entry.getUpdated());
-
-
- // GUID is sometimes a URL but the documentation says this cannot be guaranteed, so we use dc:identifier
- createStringProperty(r_entry, NS_DC_TERMS + "identifier", entry.getId());
-
-
- for(Object module : entry.getModules()) {
- if(module instanceof DCModule) {
- parseDCModule(r_entry, (DCModule)module);
- } else if(module instanceof GeoRSSModule) {
- parseGeoModule(r_entry, (GeoRSSModule)module);
- } else if(module instanceof MediaEntryModule) {
- parseMediaModule(r_entry, (MediaEntryModule)module);
- } else if(module instanceof ContentModule) {
- parseContentModule(r_entry, (ContentModule)module);
- } else {
- log.warn("module {} not supported yet", module.toString());
- }
-
- // TODO: add support for more modules!
- }
-
- // for each link we create a sioc:link
- for(Link link : entry.getOtherLinks()) {
- createUrlProperty(r_entry,NS_SIOC + "link", resolveURI(link.getHref()));
- }
-
- // copyright information
- createStringProperty(r_entry, NS_DC_TERMS + "rights", entry.getRights());
-
- // if the source is present, we link just to the URL using dc:source and ignore the text
- if(entry.getSource() != null) {
- createUrlProperty(r_entry, NS_DC_TERMS + "source", entry.getSource().getId());
- }
-
- if(entry.getSummary() != null) {
- createStringProperty(r_entry, NS_DC_TERMS + "abstract",entry.getSummary().getValue());
- }
-
- // title is dc:title
- createStringProperty(r_entry, NS_DC_TERMS + "title", entry.getTitle());
-
- log.debug("parsed Atom item {}", r_entry.stringValue());
- }
-
- /**
- * Import data from an RSS or atom feed using the ROME SyndFeed representation.
- *
- * @param feed the ROME rss/atom feed representation
- * @return count of imported documents
- */
- private void parseFeed(final Feed feed) throws RDFParseException, RDFHandlerException {
- if (log.isInfoEnabled()) {
- log.info("importing entries from {} feed '{}' found at '{}'",new Object[] {feed.getFeedType(),feed.getTitle(),feed.getId()});
- }
-
- final String feedUri = feed.getId();
- if (feedUri == null) {
- log.error("feed '{}' has neither uri nor link to reference", feed.getTitle());
- return;
- }
-
- // we set some namespaces first
- setNamespace(NS_DC_TERMS,"dcterms");
- setNamespace(NS_RSS_SY,"sy");
- setNamespace(NS_RSS_CONTENT,"content");
- setNamespace(NS_SIOC,"sioc");
-
- URI r_feed = createURI(feedUri);
- URI rdf_type = createURI(NS_RDF + "type");
-
- // add type sioc:Forum
- rdfHandler.handleStatement(createStatement(r_feed, rdf_type, createURI(NS_SIOC + "Forum")));
- createUrlProperty(r_feed, NS_SIOC + "feed", feedUri);
-
- // for each link we create a sioc:link
- for(Link link : feed.getAlternateLinks()) {
- createUrlProperty(r_feed,NS_SIOC + "link", resolveURI(link.getHref()));
- }
-
- // add all authors as dc:creator
- for(Person person : feed.getAuthors()) {
- parsePerson(r_feed, person, "creator");
- }
-
-
- // add all categories that are present
- for(Object category : feed.getCategories()) {
- parseCategory(r_feed, (Category) category);
- }
-
- // add all contributors as dc:contributor
- for(Person person : feed.getAuthors()) {
- parsePerson(r_feed, person, "contributor");
- }
-
-
- // add dc:creator to point to the software used for generating feed
- createStringProperty(r_feed, NS_DC_TERMS + "provenance", feed.getGenerator().getValue());
-
- // add foaf:depiction in case there is an image
- if(feed.getIcon() != null) {
- createUrlProperty(r_feed, NS_FOAF + "thumbnail", resolveURI(feed.getIcon()));
- }
-
- // add all feed items
- for(Entry item : feed.getEntries()) {
- parseFeedEntry(item, r_feed);
- }
-
- // add dc:language for feed.getLanguage()
- createStringProperty(r_feed, NS_DC_TERMS + "language", feed.getLanguage());
-
- // add foaf:depiction in case there is an image
- if(feed.getLogo() != null) {
- createUrlProperty(r_feed, NS_FOAF + "logo", resolveURI(feed.getLogo()));
- }
-
-
-
- for(Module module : feed.getModules()) {
- if(module instanceof SyModule) {
- SyModule syModule = (SyModule)module;
- createStringProperty(r_feed,NS_RSS_SY + "updatePeriod", syModule.getUpdatePeriod());
- createIntProperty(r_feed, NS_RSS_SY + "updateFrequency", syModule.getUpdateFrequency());
- createDateProperty(r_feed, NS_RSS_SY + "updateBase", syModule.getUpdateBase());
- } else if(module instanceof DCModule) {
- parseDCModule(r_feed, (DCModule)module);
- }
- }
-
- // for each link we create a sioc:link
- for(Link link : feed.getOtherLinks()) {
- createUrlProperty(r_feed,NS_SIOC + "link", resolveURI(link.getHref()));
- }
- // add dc:rights for feed.getCopyright()
- createStringProperty(r_feed, NS_DC_TERMS + "rights", feed.getRights());
-
- // add dc:description for feed.getDescription()
- if(feed.getSubtitle() != null) {
- createStringProperty(r_feed, NS_DC_TERMS + "description", feed.getSubtitle().getValue());
- }
-
-
- // textinput: we skip it, the documentation says:
- // "The purpose of the <textInput> element is something of a mystery. You can use it to specify a
- // search engine box. Or to allow a reader to provide feedback. Most aggregators ignore it. "
-
- createStringProperty(r_feed, NS_DC_TERMS + "title", feed.getTitle());
-
- // add dc:created and dc:issued for update date
- createDateProperty(r_feed, NS_DC_TERMS + "created", feed.getUpdated());
- createDateProperty(r_feed, NS_DC_TERMS + "issued", feed.getUpdated());
-
- log.info("importing Atom feed finished successfully.");
- }
-
- protected void parseCategory(Resource resource, Category category) throws RDFHandlerException, RDFParseException {
- if(category.getTerm() == null) {
- return;
- }
-
- try {
- Resource skosConcept;
- if(category.getScheme() != null ) {
- // create a skos:Concept with the domain as namespace and a local name derived from the value, add it as sioc:topic
- String localName = URLEncoder.encode(category.getTerm(), "UTF-8");
- String namespace = category.getScheme();
- skosConcept = createURI(namespace+(namespace.endsWith("/") || namespace.endsWith("#")?"":"/")+localName);
- } else {
- // create a skos:Concept with the baseUri as namespace and a local name derived from the value, add it as sioc:topic
- String localName = URLEncoder.encode(category.getTerm(), "UTF-8");
- skosConcept = resolveURI(localName);
- }
- createUrlProperty(skosConcept,NS_RDF + "type", NS_SKOS+"Concept");
- if(category.getLabel() != null) {
- createStringProperty(skosConcept, NS_SKOS + "prefLabel", category.getLabel());
- } else {
- createStringProperty(skosConcept, NS_SKOS + "prefLabel", category.getTerm());
- }
- rdfHandler.handleStatement(createStatement(resource,createURI(NS_SIOC + "topic"),skosConcept));
- } catch (UnsupportedEncodingException e) {
- throw new RDFParseException(e);
- }
-
-
- // add category value as dc:subject
- if(category.getLabel() != null) {
- createStringProperty(resource, NS_DC_TERMS + "subject", category.getLabel());
- } else {
- createStringProperty(resource, NS_DC_TERMS + "subject", category.getTerm());
- }
-
- }
-
-
- protected void parsePerson(Resource r_entry, Person person, String relation) throws RDFParseException, RDFHandlerException {
- if("creator".equals(relation) && (person.getUri() != null || person.getEmail() != null)) {
- String personUri = person.getUri() != null ? person.getUri() : "mailto:"+person.getEmail();
- Resource r_person = createURI(personUri);
- createStringProperty(r_person, NS_FOAF + "name", person.getName());
- if(person.getEmail() != null) {
- createUrlProperty(r_person, NS_FOAF + "mbox", "mailto:"+person.getEmail());
- }
- createUrlProperty(r_person, NS_FOAF + "homepage", person.getUri());
-
- rdfHandler.handleStatement(createStatement(r_entry, createURI(NS_FOAF + "maker"), r_person));
- rdfHandler.handleStatement(createStatement(r_person, createURI(NS_FOAF + "made"), r_entry));
- }
- createStringProperty(r_entry,NS_DC_TERMS + relation,person.getName());
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/21a28cf8/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/AtomParserFactory.java
----------------------------------------------------------------------
diff --git a/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/AtomParserFactory.java b/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/AtomParserFactory.java
deleted file mode 100644
index a2ff512..0000000
--- a/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/AtomParserFactory.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Copyright (C) 2013 Salzburg Research.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package at.newmedialab.sesame.rio.rss;
-
-import org.openrdf.rio.RDFFormat;
-import org.openrdf.rio.RDFParser;
-import org.openrdf.rio.RDFParserFactory;
-
-/**
- * Add file description here!
- * <p/>
- * Author: Sebastian Schaffert
- */
-public class AtomParserFactory implements RDFParserFactory {
-
- /**
- * Returns the RDF format for this factory.
- */
- @Override
- public RDFFormat getRDFFormat() {
- return AtomFormat.FORMAT;
- }
-
- /**
- * Returns a RDFParser instance.
- */
- @Override
- public RDFParser getParser() {
- return new AtomParser();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/21a28cf8/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/FeedParserBase.java
----------------------------------------------------------------------
diff --git a/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/FeedParserBase.java b/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/FeedParserBase.java
deleted file mode 100644
index 037a00d..0000000
--- a/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/FeedParserBase.java
+++ /dev/null
@@ -1,315 +0,0 @@
-/**
- * Copyright (C) 2013 Salzburg Research.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package at.newmedialab.sesame.rio.rss;
-
-import com.sun.syndication.feed.module.DCModule;
-import com.sun.syndication.feed.module.DCSubject;
-import org.openrdf.model.Literal;
-import org.openrdf.model.Resource;
-import org.openrdf.model.URI;
-import org.openrdf.model.ValueFactory;
-import org.openrdf.model.impl.ValueFactoryImpl;
-import org.openrdf.rio.RDFHandlerException;
-import org.openrdf.rio.RDFParseException;
-import org.openrdf.rio.helpers.RDFParserBase;
-import org.rometools.feed.module.content.ContentModule;
-import org.rometools.feed.module.georss.GeoRSSModule;
-import org.rometools.feed.module.mediarss.MediaEntryModule;
-import org.rometools.feed.module.mediarss.types.MediaContent;
-import org.rometools.feed.module.mediarss.types.Metadata;
-import org.rometools.feed.module.mediarss.types.UrlReference;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javax.xml.datatype.DatatypeConfigurationException;
-import javax.xml.datatype.DatatypeFactory;
-import javax.xml.datatype.XMLGregorianCalendar;
-import java.io.UnsupportedEncodingException;
-import java.net.URLEncoder;
-import java.util.Date;
-import java.util.GregorianCalendar;
-import java.util.TimeZone;
-
-/**
- * Common functionality for RSS and Atom feed parsing
- * <p/>
- * Author: Sebastian Schaffert
- */
-public abstract class FeedParserBase extends RDFParserBase {
- private static Logger log = LoggerFactory.getLogger(FeedParserBase.class);
-
-
- protected static final String NS_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
- protected static final String NS_DC = "http://purl.org/dc/elements/1.1/";
- protected static final String NS_DC_TERMS = "http://purl.org/dc/terms/";
- protected static final String NS_SIOC = "http://rdfs.org/sioc/ns#";
- protected static final String NS_SKOS = "http://www.w3.org/2004/02/skos/core#";
- protected static final String NS_RSS = "http://purl.org/rss/1.0/";
- protected static final String NS_RSS_CONTENT = "http://purl.org/rss/1.0/modules/content/";
- protected static final String NS_RSS_SY = "http://purl.org/rss/1.0/modules/syndication/";
- protected static final String NS_ADMIN = "http://webns.net/mvcb/";
- protected static final String NS_FOAF = "http://xmlns.com/foaf/0.1/";
- protected static final String NS_GEO = "http://www.w3.org/2003/01/geo/wgs84_pos#";
- protected static final String NS_MA = "http://www.w3.org/ns/ma-ont#";
-
-
- protected ValueFactory valueFactory;
-
-
- /**
- * Creates a new RDFParserBase that will use a {@link org.openrdf.model.impl.ValueFactoryImpl} to
- * create RDF model objects.
- */
- protected FeedParserBase() {
- this(new ValueFactoryImpl());
- }
-
- /**
- * Creates a new RDFParserBase that will use the supplied ValueFactory to
- * create RDF model objects.
- *
- * @param valueFactory A ValueFactory.
- */
- protected FeedParserBase(ValueFactory valueFactory) {
- super(valueFactory);
- this.valueFactory = valueFactory;
- }
-
- @Override
- public void setValueFactory(ValueFactory valueFactory) {
- super.setValueFactory(valueFactory);
- this.valueFactory = valueFactory;
- }
-
-
- protected void parseDCModule(Resource resource, DCModule dcModule) throws RDFHandlerException, RDFParseException {
- for(String contributor : dcModule.getContributors()) {
- createStringProperty(resource, NS_DC_TERMS + "contributor", contributor);
- }
- for(String coverage : dcModule.getCoverages()) {
- createStringProperty(resource, NS_DC_TERMS + "coverage", coverage);
- }
- for(String creator : dcModule.getCreators()) {
- createStringProperty(resource, NS_DC_TERMS + "creator", creator);
- }
- for(Date date : dcModule.getDates()) {
- createDateProperty(resource, NS_DC_TERMS + "date", date);
- }
- for(String description : dcModule.getDescriptions()) {
- createStringProperty(resource, NS_DC_TERMS + "description", description);
- }
- for(String format : dcModule.getFormats()) {
- createStringProperty(resource, NS_DC_TERMS + "format", format);
- }
- for(String identifier : dcModule.getIdentifiers()) {
- createStringProperty(resource, NS_DC_TERMS + "identifier", identifier);
- }
- for(String language : dcModule.getLanguages()) {
- createStringProperty(resource, NS_DC_TERMS + "language", language);
- }
- for(String publisher : dcModule.getPublishers()) {
- createStringProperty(resource, NS_DC_TERMS + "publisher", publisher);
- }
- for(String relation : dcModule.getRelations()) {
- createUrlProperty(resource, NS_DC_TERMS + "relation", relation);
- }
- for(String rights : dcModule.getRightsList()) {
- createStringProperty(resource, NS_DC_TERMS + "rights", rights);
- }
- for(String source : dcModule.getSources()) {
- createUrlProperty(resource, NS_DC_TERMS + "source", source);
- }
- for(DCSubject subject : dcModule.getSubjects()) {
- parseDCSubject(resource, subject);
- }
- for(String title : dcModule.getTitles()) {
- createStringProperty(resource, NS_DC_TERMS + "title", title);
- }
- for(String type : dcModule.getTypes()) {
- createStringProperty(resource, NS_DC_TERMS + "type", type);
- }
- }
-
- protected void parseContentModule(Resource resource, ContentModule contentModule) throws RDFHandlerException, RDFParseException {
- for(Object content : contentModule.getEncodeds()) {
- createStringProperty(resource,NS_RSS_CONTENT + "encoded",(String)content);
- }
-
- // TODO: more sophisticated forms are nowadays rarely used, we do not support them
- if(contentModule.getContentItems() != null && contentModule.getContentItems().size() > 0) {
- log.warn("content items are not supported yet");
- }
- }
-
- protected void parseGeoModule(Resource resource, GeoRSSModule geoRSSModule) throws RDFParseException, RDFHandlerException {
- if(geoRSSModule.getPosition() != null) {
- Resource r_location = createBNode();
- Resource t_adr = createURI(NS_GEO + "Point");
- URI p_type = createURI(NS_RDF + "type");
- rdfHandler.handleStatement(createStatement(r_location,p_type,t_adr));
-
- createDoubleProperty(r_location,NS_GEO+"latitude",geoRSSModule.getPosition().getLatitude());
- createDoubleProperty(r_location,NS_GEO+"longitude",geoRSSModule.getPosition().getLongitude());
-
-
- rdfHandler.handleStatement(createStatement(resource,createURI(NS_DC_TERMS + "spatial"),r_location));
-
- }
- }
-
- protected void parseMediaModule(Resource resource, MediaEntryModule mediaEntryModule) throws RDFParseException, RDFHandlerException {
- for(MediaContent content : mediaEntryModule.getMediaContents()) {
- if(content.getReference() != null && content.getReference() instanceof UrlReference) {
- URI r_media = createURI(((UrlReference) content.getReference()).getUrl().toString());
- rdfHandler.handleStatement(createStatement(r_media, createURI(NS_RDF + "type"), createURI(NS_MA + "MediaResource")));
- rdfHandler.handleStatement(createStatement(r_media, createURI(NS_MA + "locator"), r_media));
-
- if(content.getBitrate() != null)
- createDoubleProperty(r_media, NS_MA + "averageBitRate", content.getBitrate());
- if(content.getDuration() != null)
- createLongProperty(r_media, NS_MA + "duration", content.getDuration());
-
- createStringProperty(r_media, NS_MA + "hasFormat", content.getType());
-
- if(content.getFramerate() != null)
- createDoubleProperty(r_media, NS_MA + "frameRate", content.getFramerate());
-
- if(content.getHeight() != null)
- createIntProperty(r_media, NS_MA + "frameHeight", content.getHeight());
- if(content.getWidth() != null)
- createIntProperty(r_media, NS_MA + "frameWidth", content.getWidth());
-
- createStringProperty(r_media, NS_MA + "hasLanguage", content.getLanguage());
-
- if(content.getMetadata() != null) {
- Metadata metadata = content.getMetadata();
-
- createStringProperty(r_media, NS_MA + "title", metadata.getTitle());
- createStringProperty(r_media, NS_MA + "copyright", metadata.getCopyright());
- createStringProperty(r_media, NS_MA + "description", metadata.getDescription());
-
- for(String keyword : metadata.getKeywords()) {
- createStringProperty(r_media, NS_MA + "hasKeyword", keyword);
- }
-
- }
-
- rdfHandler.handleStatement(createStatement(resource, createURI(NS_SIOC+"hasPart"), r_media));
- }
- }
-
- }
-
-
-
- protected void parseDCSubject(Resource resource, DCSubject category) throws RDFHandlerException, RDFParseException {
- if(category.getValue() == null) {
- return;
- }
-
- try {
- Resource skosConcept;
- if(category.getTaxonomyUri() != null && category.getValue() != null) {
- // create a skos:Concept with the domain as namespace and a local name derived from the value, add it as sioc:topic
- String localName = URLEncoder.encode(category.getValue(),"UTF-8");
- String namespace = category.getTaxonomyUri();
- skosConcept = createURI(namespace+(namespace.endsWith("/") || namespace.endsWith("#")?"":"/")+localName);
- } else {
- // create a skos:Concept with the baseUri as namespace and a local name derived from the value, add it as sioc:topic
- String localName = URLEncoder.encode(category.getValue(), "UTF-8");
- skosConcept = resolveURI(localName);
- }
- createUrlProperty(skosConcept,NS_RDF + "type", NS_SKOS+"Concept");
- createStringProperty(skosConcept, NS_SKOS + "prefLabel", category.getValue());
- rdfHandler.handleStatement(createStatement(resource,createURI(NS_SIOC + "topic"),skosConcept));
- } catch (UnsupportedEncodingException e) {
- throw new RDFParseException(e);
- }
-
-
- // add category value as dc:subject
- createStringProperty(resource, NS_DC_TERMS + "subject", category.getValue());
-
- }
-
-
- protected void createStringProperty(Resource resource, String rdfProperty, String value) throws RDFParseException, RDFHandlerException {
- if(value != null && !"".equals(value.trim())) {
- URI p_description = createURI(rdfProperty);
- Literal v_description = createLiteral(value, null, null);
- rdfHandler.handleStatement(createStatement(resource,p_description,v_description));
- }
- }
-
- protected void createIntProperty(Resource resource, String rdfProperty, int value) throws RDFParseException, RDFHandlerException {
- URI p_description = createURI(rdfProperty);
- Literal v_description = createLiteral(""+value, null, createURI("http://www.w3.org/2001/XMLSchema#int"));
- rdfHandler.handleStatement(createStatement(resource,p_description,v_description));
- }
-
- protected void createLongProperty(Resource resource, String rdfProperty, long value) throws RDFParseException, RDFHandlerException {
- URI p_description = createURI(rdfProperty);
- Literal v_description = createLiteral(""+value, null, createURI("http://www.w3.org/2001/XMLSchema#long"));
- rdfHandler.handleStatement(createStatement(resource,p_description,v_description));
- }
-
- protected void createDoubleProperty(Resource resource, String rdfProperty, double value) throws RDFParseException, RDFHandlerException {
- URI p_description = createURI(rdfProperty);
- Literal v_description = createLiteral(""+value, null, createURI("http://www.w3.org/2001/XMLSchema#double"));
- rdfHandler.handleStatement(createStatement(resource,p_description,v_description));
- }
-
-
- protected void createDateProperty(Resource resource, String rdfProperty, Date value) throws RDFParseException, RDFHandlerException {
- if(value != null) {
- URI p_dateprop = createURI(rdfProperty);
- Literal v_dateprop = valueFactory.createLiteral(getXMLCalendar(value,null));
- rdfHandler.handleStatement(createStatement(resource,p_dateprop,v_dateprop));
- }
- }
-
-
- protected void createUrlProperty(Resource resource, String rdfProperty, String value) throws RDFParseException, RDFHandlerException {
- if(value != null) {
- URI p_description = createURI(rdfProperty);
- URI v_description = createURI(value);
- rdfHandler.handleStatement(createStatement(resource,p_description,v_description));
- }
- }
-
- protected void createUrlProperty(Resource resource, String rdfProperty, Resource value) throws RDFParseException, RDFHandlerException {
- if(value != null) {
- URI p_description = createURI(rdfProperty);
- rdfHandler.handleStatement(createStatement(resource,p_description,value));
- }
- }
-
-
- protected static XMLGregorianCalendar getXMLCalendar(Date date, TimeZone timezone) {
- GregorianCalendar c = new GregorianCalendar();
- c.setTime(date);
- if(timezone != null)
- c.setTimeZone(timezone);
- try {
- return DatatypeFactory.newInstance().newXMLGregorianCalendar(c);
- } catch (DatatypeConfigurationException e) {
- return null;
- }
- }
-
-
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/21a28cf8/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/RSSParser.java
----------------------------------------------------------------------
diff --git a/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/RSSParser.java b/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/RSSParser.java
deleted file mode 100644
index 68d281b..0000000
--- a/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/RSSParser.java
+++ /dev/null
@@ -1,383 +0,0 @@
-/**
- * Copyright (C) 2013 Salzburg Research.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package at.newmedialab.sesame.rio.rss;
-
-import com.google.common.base.Preconditions;
-import com.sun.syndication.feed.WireFeed;
-import com.sun.syndication.feed.module.DCModule;
-import com.sun.syndication.feed.module.Module;
-import com.sun.syndication.feed.module.SyModule;
-import com.sun.syndication.feed.rss.Category;
-import com.sun.syndication.feed.rss.Channel;
-import com.sun.syndication.feed.rss.Enclosure;
-import com.sun.syndication.feed.rss.Item;
-import com.sun.syndication.io.FeedException;
-import com.sun.syndication.io.WireFeedInput;
-import org.openrdf.model.Resource;
-import org.openrdf.model.URI;
-import org.openrdf.model.ValueFactory;
-import org.openrdf.model.impl.ValueFactoryImpl;
-import org.openrdf.rio.RDFFormat;
-import org.openrdf.rio.RDFHandlerException;
-import org.openrdf.rio.RDFParseException;
-import org.rometools.feed.module.content.ContentModule;
-import org.rometools.feed.module.georss.GeoRSSModule;
-import org.rometools.feed.module.mediarss.MediaEntryModule;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.xml.sax.InputSource;
-
-import java.io.*;
-import java.net.URLEncoder;
-
-/**
- * Parse RSS feed into RDF. Uses the following vocabularies:
- * <ul>
- * <li>dcterms for representing most metadata about feeds and entries</li>
- * <li>sioc for type information and relation between concepts</li>
- * <li>skos for representing categories associated with items or channels</li>
- * <li>media ontology for representing information from the mediarss extension</li>
- * <li>wgs84 geo ontology for representing information from the georss extension</li>
- * </ul>
- * RSS properties without a good corresponding vocabulary are copied 1:1 using the rss namespace itself.
- * <p/>
- * Author: Sebastian Schaffert
- */
-public final class RSSParser extends FeedParserBase {
-
- private static Logger log = LoggerFactory.getLogger(RSSParser.class);
-
-
- /**
- * Creates a new RDFParserBase that will use a {@link org.openrdf.model.impl.ValueFactoryImpl} to
- * create RDF model objects.
- */
- public RSSParser() {
- this(new ValueFactoryImpl());
- }
-
- /**
- * Creates a new RDFParserBase that will use the supplied ValueFactory to
- * create RDF model objects.
- *
- * @param valueFactory A ValueFactory.
- */
- public RSSParser(ValueFactory valueFactory) {
- super(valueFactory);
- this.valueFactory = valueFactory;
- }
-
-
-
- /**
- * Gets the RDF format that this parser can parse.
- */
- @Override
- public RDFFormat getRDFFormat() {
- return RSSFormat.FORMAT;
- }
-
-
- /**
- * Parses the data from the supplied InputStream, using the supplied baseURI
- * to resolve any relative URI references.
- *
- * @param in The InputStream from which to read the data.
- * @param baseURI The URI associated with the data in the InputStream.
- * @throws java.io.IOException If an I/O error occurred while data was read from the InputStream.
- * @throws org.openrdf.rio.RDFParseException
- * If the parser has found an unrecoverable parse error.
- * @throws org.openrdf.rio.RDFHandlerException
- * If the configured statement handler has encountered an
- * unrecoverable error.
- */
- @Override
- public void parse(InputStream in, String baseURI) throws IOException, RDFParseException, RDFHandlerException {
- Preconditions.checkNotNull(baseURI);
-
- setBaseURI(baseURI);
-
- WireFeedInput input = new WireFeedInput();
- try {
- WireFeed feed = input.build(new InputSource(in));
- if(feed instanceof Channel) {
- parseFeed((Channel) feed);
- } else {
- throw new RDFParseException("data stream is not an RSS feed");
- }
- } catch (FeedException e) {
- throw new RDFParseException(e);
- }
- }
-
- /**
- * Parses the data from the supplied Reader, using the supplied baseURI to
- * resolve any relative URI references.
- *
- * @param reader The Reader from which to read the data.
- * @param baseURI The URI associated with the data in the InputStream.
- * @throws java.io.IOException If an I/O error occurred while data was read from the InputStream.
- * @throws org.openrdf.rio.RDFParseException
- * If the parser has found an unrecoverable parse error.
- * @throws org.openrdf.rio.RDFHandlerException
- * If the configured statement handler has encountered an
- * unrecoverable error.
- */
- @Override
- public void parse(Reader reader, String baseURI) throws IOException, RDFParseException, RDFHandlerException {
- Preconditions.checkNotNull(baseURI);
-
- setBaseURI(baseURI);
-
- WireFeedInput input = new WireFeedInput();
- try {
- WireFeed feed = input.build(reader);
- if(feed instanceof Channel) {
- parseFeed((Channel) feed);
- } else {
- throw new RDFParseException("data stream is not an RSS feed");
- }
- } catch (FeedException e) {
- throw new RDFParseException(e);
- }
- }
-
-
-
- private void parseFeedEntry(final Item entry, final Resource r_feed) throws RDFParseException, RDFHandlerException {
-
- final String entryURI = entry.getUri() != null ? entry.getUri() : entry.getLink();
-
- URI r_entry = createURI(entryURI);
- URI rdf_type = createURI(NS_RDF + "type");
-
-
- // add type sioc:Post
- rdfHandler.handleStatement(createStatement(r_entry, rdf_type, createURI(NS_SIOC + "Post")));
-
- // add as sioc:container_of from parent feed
- rdfHandler.handleStatement(createStatement(r_feed, createURI(NS_SIOC + "container_of"), r_entry));
- rdfHandler.handleStatement(createStatement(r_entry, createURI(NS_SIOC + "has_container"), r_feed));
-
- createStringProperty(r_entry, NS_DC_TERMS + "creator", entry.getAuthor());
-
- for(Object category : entry.getCategories()) {
- parseCategory(r_entry, (Category)category);
- }
-
- createUrlProperty(r_entry, NS_SIOC + "has_discussion", entry.getComments());
-
- if(entry.getContent() != null) {
- createStringProperty(r_entry, NS_RSS_CONTENT + "encoded", entry.getContent().getValue());
- createStringProperty(r_entry, NS_RSS_CONTENT + "format", entry.getContent().getType());
- }
-
- if(entry.getDescription() != null) {
- createStringProperty(r_entry, NS_DC_TERMS + "description", entry.getDescription().getValue());
- }
-
- // enclosures relate items to media resources used; we use dcterms:hasPart to link to them
- for(Enclosure enclosure : entry.getEnclosures()) {
- createUrlProperty(r_entry, NS_DC_TERMS + "hasPart", enclosure.getUrl());
- }
-
- // for the expiration date we use dc:valid; it is a bit underspecified :-(
- createDateProperty(r_entry, NS_DC_TERMS + "valid", entry.getExpirationDate());
-
- // GUID is sometimes a URL but the documentation says this cannot be guaranteed, so we use dc:identifier
- createStringProperty(r_entry, NS_DC_TERMS + "identifier", entry.getGuid().getValue());
-
- // for the link we use sioc:link
- createUrlProperty(r_entry, NS_SIOC + "link", entry.getLink());
-
- for(Module module : entry.getModules()) {
- if(module instanceof DCModule) {
- parseDCModule(r_entry, (DCModule)module);
- } else if(module instanceof GeoRSSModule) {
- parseGeoModule(r_entry, (GeoRSSModule)module);
- } else if(module instanceof MediaEntryModule) {
- parseMediaModule(r_entry, (MediaEntryModule)module);
- } else if(module instanceof ContentModule) {
- parseContentModule(r_entry, (ContentModule)module);
- } else {
- log.warn("module {} not supported yet", module.getUri());
- }
-
- // TODO: add support for more modules!
- }
-
- // publication date is dc:issued
- createDateProperty(r_entry, NS_DC_TERMS + "issued", entry.getPubDate());
-
- // if the source is present, we link just to the URL using dc:source and ignore the text
- if(entry.getSource() != null)
- createUrlProperty(r_entry, NS_DC_TERMS + "source", entry.getSource().getUrl());
-
- // title is dc:title
- createStringProperty(r_entry, NS_DC_TERMS + "title", entry.getTitle());
-
- log.debug("parsed RSS item {}", r_entry.stringValue());
- }
-
- /**
- * Import data from an RSS or atom feed using the ROME SyndFeed representation.
- *
- * @param feed the ROME rss/atom feed representation
- * @return count of imported documents
- */
- private void parseFeed(final Channel feed) throws RDFParseException, RDFHandlerException {
- if (log.isInfoEnabled()) {
- log.info("importing entries from {} feed '{}' found at '{}'",new Object[] {feed.getFeedType(),feed.getTitle(),feed.getUri()});
- }
-
- final String feedUri = feed.getUri() != null ? feed.getUri() : feed.getLink();
- if (feedUri == null) {
- log.error("feed '{}' has neither uri nor link to reference", feed.getTitle());
- return;
- }
-
- // we set some namespaces first
- setNamespace(NS_DC_TERMS,"dcterms");
- setNamespace(NS_RSS_SY,"sy");
- setNamespace(NS_RSS_CONTENT,"content");
- setNamespace(NS_SIOC,"sioc");
-
- URI r_feed = createURI(feedUri);
- URI rdf_type = createURI(NS_RDF + "type");
-
- // add type sioc:Forum
- rdfHandler.handleStatement(createStatement(r_feed, rdf_type, createURI(NS_SIOC + "Forum")));
- createUrlProperty(r_feed, NS_SIOC + "feed", feedUri);
-
- // add all categories that are present
- for(Category category : feed.getCategories()) {
- parseCategory(r_feed,category);
- }
-
- // if feed.getCloud() present, we add its specifications using the RSS namespace
- if(feed.getCloud() != null) {
- createStringProperty(r_feed, NS_RSS + "cloudUpdateProtocol", feed.getCloud().getProtocol());
- createStringProperty(r_feed, NS_RSS + "cloudUpdateDomain", feed.getCloud().getDomain());
- createStringProperty(r_feed, NS_RSS + "cloudUpdatePath", feed.getCloud().getPath());
- createStringProperty(r_feed, NS_RSS + "cloudUpdateProcedure", feed.getCloud().getRegisterProcedure());
- createIntProperty(r_feed, NS_RSS + "cloudUpdatePort", feed.getCloud().getPort());
- }
-
- // add dc:rights for feed.getCopyright()
- createStringProperty(r_feed, NS_DC_TERMS + "rights", feed.getCopyright());
-
- // add dc:description for feed.getDescription()
- createStringProperty(r_feed, NS_DC_TERMS + "description", feed.getDescription());
-
- // ignore feed.getDocs()
-
- // add dc:creator to point to the software used for generating feed
- createStringProperty(r_feed, NS_DC_TERMS + "provenance", feed.getGenerator());
-
- // add foaf:depiction in case there is an image
- if(feed.getImage() != null)
- createUrlProperty(r_feed, NS_FOAF + "depiction", feed.getImage().getUrl());
-
- // add all feed items
- for(Item item : feed.getItems()) {
- parseFeedEntry(item, r_feed);
- }
-
- // add dc:language for feed.getLanguage()
- createStringProperty(r_feed, NS_DC_TERMS + "language", feed.getLanguage());
-
- // add dc:created for getLastBuildDate()
- createDateProperty(r_feed, NS_DC_TERMS + "created", feed.getLastBuildDate());
-
- // add sioc:link for getLink()
- createUrlProperty(r_feed, NS_SIOC + "link", feed.getLink());
-
- // add dc:creator for managing editor
- createStringProperty(r_feed, NS_DC_TERMS + "creator", feed.getManagingEditor());
-
- for(Module module : feed.getModules()) {
- if(module instanceof SyModule) {
- SyModule syModule = (SyModule)module;
- createStringProperty(r_feed,NS_RSS_SY + "updatePeriod", syModule.getUpdatePeriod());
- createIntProperty(r_feed, NS_RSS_SY + "updateFrequency", syModule.getUpdateFrequency());
- createDateProperty(r_feed, NS_RSS_SY + "updateBase", syModule.getUpdateBase());
- } else if(module instanceof DCModule) {
- parseDCModule(r_feed, (DCModule)module);
- }
- }
-
- // create publication date as dc:issued
- createDateProperty(r_feed, NS_DC_TERMS+"issued",feed.getPubDate());
-
- // PICS is superseded and there is no proper RDF way to do it, so we use an RSS property
- createStringProperty(r_feed, NS_RSS + "rating", feed.getRating());
-
- // skip days are also added using RSS vocabulary, they are actually syndication info
- for(String day : feed.getSkipDays()) {
- createStringProperty(r_feed, NS_RSS + "skipDay", day);
- }
- for(Integer hour : feed.getSkipHours()) {
- createIntProperty(r_feed, NS_RSS + "skipHour", hour);
- }
-
- // textinput: we skip it, the documentation says:
- // "The purpose of the <textInput> element is something of a mystery. You can use it to specify a
- // search engine box. Or to allow a reader to provide feedback. Most aggregators ignore it. "
-
- createStringProperty(r_feed, NS_DC_TERMS + "title", feed.getTitle());
-
- // ttl is again meta information about the syndication, we use the RSS namespace
- if(feed.getTtl() > 0)
- createIntProperty(r_feed, NS_RSS + "ttl", feed.getTtl());
-
- // add dc:publisher for webmaster
- createStringProperty(r_feed, NS_DC_TERMS + "publisher", feed.getWebMaster());
-
- log.info("importing RSS feed finished successfully.");
- }
-
- protected void parseCategory(Resource resource, Category category) throws RDFHandlerException, RDFParseException {
- if(category.getValue() == null) {
- return;
- }
-
- try {
- Resource skosConcept;
- if(category.getDomain() != null && category.getValue() != null) {
- // create a skos:Concept with the domain as namespace and a local name derived from the value, add it as sioc:topic
- String localName = URLEncoder.encode(category.getValue(), "UTF-8");
- String namespace = category.getDomain();
- skosConcept = createURI(namespace+(namespace.endsWith("/") || namespace.endsWith("#")?"":"/")+localName);
- } else {
- // create a skos:Concept with the baseUri as namespace and a local name derived from the value, add it as sioc:topic
- String localName = URLEncoder.encode(category.getValue(), "UTF-8");
- skosConcept = resolveURI(localName);
- }
- createUrlProperty(skosConcept,NS_RDF + "type", NS_SKOS+"Concept");
- createStringProperty(skosConcept, NS_SKOS + "prefLabel", category.getValue());
- rdfHandler.handleStatement(createStatement(resource,createURI(NS_SIOC + "topic"),skosConcept));
- } catch (UnsupportedEncodingException e) {
- throw new RDFParseException(e);
- }
-
-
- // add category value as dc:subject
- createStringProperty(resource, NS_DC_TERMS + "subject", category.getValue());
-
- }
-
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/21a28cf8/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/RSSParserFactory.java
----------------------------------------------------------------------
diff --git a/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/RSSParserFactory.java b/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/RSSParserFactory.java
deleted file mode 100644
index bf07201..0000000
--- a/commons/sesame-tools-rio-rss/src/main/java/at/newmedialab/sesame/rio/rss/RSSParserFactory.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Copyright (C) 2013 Salzburg Research.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package at.newmedialab.sesame.rio.rss;
-
-import org.openrdf.rio.RDFFormat;
-import org.openrdf.rio.RDFParser;
-import org.openrdf.rio.RDFParserFactory;import java.lang.Override;
-
-/**
- * Create parsers that can transform RSS into RDF using FOAF and SIOC vocabularies
- * <p/>
- * Author: Sebastian Schaffert
- */
-public class RSSParserFactory implements RDFParserFactory {
-
- /**
- * Returns the RDF format for this factory.
- */
- @Override
- public RDFFormat getRDFFormat() {
- return RSSFormat.FORMAT;
- }
-
- /**
- * Returns a RDFParser instance.
- */
- @Override
- public RDFParser getParser() {
- return new RSSParser();
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/21a28cf8/commons/sesame-tools-rio-rss/src/main/java/org/apache/marmotta/commons/sesame/rio/rss/AtomParser.java
----------------------------------------------------------------------
diff --git a/commons/sesame-tools-rio-rss/src/main/java/org/apache/marmotta/commons/sesame/rio/rss/AtomParser.java b/commons/sesame-tools-rio-rss/src/main/java/org/apache/marmotta/commons/sesame/rio/rss/AtomParser.java
new file mode 100644
index 0000000..ab5ec80
--- /dev/null
+++ b/commons/sesame-tools-rio-rss/src/main/java/org/apache/marmotta/commons/sesame/rio/rss/AtomParser.java
@@ -0,0 +1,420 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.commons.sesame.rio.rss;
+
+import com.google.common.base.Preconditions;
+import com.sun.syndication.feed.WireFeed;
+import com.sun.syndication.feed.atom.*;
+import com.sun.syndication.feed.module.DCModule;
+import com.sun.syndication.feed.module.Module;
+import com.sun.syndication.feed.module.SyModule;
+import com.sun.syndication.io.FeedException;
+import com.sun.syndication.io.WireFeedInput;
+
+import org.apache.marmotta.commons.sesame.rio.rss.AtomFormat;
+import org.openrdf.model.Resource;
+import org.openrdf.model.URI;
+import org.openrdf.model.ValueFactory;
+import org.openrdf.model.impl.ValueFactoryImpl;
+import org.openrdf.rio.RDFFormat;
+import org.openrdf.rio.RDFHandlerException;
+import org.openrdf.rio.RDFParseException;
+import org.rometools.feed.module.content.ContentModule;
+import org.rometools.feed.module.georss.GeoRSSModule;
+import org.rometools.feed.module.mediarss.MediaEntryModule;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.InputSource;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+
+/**
+ * Parse Atom feed into RDF. Uses the following vocabularies:
+ * <ul>
+ * <li>dcterms for representing most metadata about feeds and entries</li>
+ * <li>sioc for type information and relation between concepts</li>
+ * <li>skos for representing categories associated with items or channels</li>
+ * <li>media ontology for representing information from the mediarss extension</li>
+ * <li>wgs84 geo ontology for representing information from the georss extension</li>
+ * </ul>
+ * <p/>
+ * Author: Sebastian Schaffert
+ */
+public class AtomParser extends FeedParserBase {
+
+
+ private static Logger log = LoggerFactory.getLogger(AtomParser.class);
+
+
+ /**
+ * Creates a new RDFParserBase that will use a {@link org.openrdf.model.impl.ValueFactoryImpl} to
+ * create RDF model objects.
+ */
+ public AtomParser() {
+ this(new ValueFactoryImpl());
+ }
+
+ /**
+ * Creates a new RDFParserBase that will use the supplied ValueFactory to
+ * create RDF model objects.
+ *
+ * @param valueFactory A ValueFactory.
+ */
+ public AtomParser(ValueFactory valueFactory) {
+ super(valueFactory);
+ this.valueFactory = valueFactory;
+ }
+
+
+
+ /**
+ * Gets the RDF format that this parser can parse.
+ */
+ @Override
+ public RDFFormat getRDFFormat() {
+ return AtomFormat.FORMAT;
+ }
+
+
+ /**
+ * Parses the data from the supplied InputStream, using the supplied baseURI
+ * to resolve any relative URI references.
+ *
+ * @param in The InputStream from which to read the data.
+ * @param baseURI The URI associated with the data in the InputStream.
+ * @throws java.io.IOException If an I/O error occurred while data was read from the InputStream.
+ * @throws org.openrdf.rio.RDFParseException
+ * If the parser has found an unrecoverable parse error.
+ * @throws org.openrdf.rio.RDFHandlerException
+ * If the configured statement handler has encountered an
+ * unrecoverable error.
+ */
+ @Override
+ public void parse(InputStream in, String baseURI) throws IOException, RDFParseException, RDFHandlerException {
+ Preconditions.checkNotNull(baseURI);
+
+ setBaseURI(baseURI);
+
+ WireFeedInput input = new WireFeedInput();
+ try {
+ WireFeed feed = input.build(new InputSource(in));
+ if(feed instanceof Feed) {
+ parseFeed((Feed) feed);
+ } else {
+ throw new RDFParseException("data stream is not an RSS feed");
+ }
+ } catch (FeedException e) {
+ throw new RDFParseException(e);
+ }
+ }
+
+ /**
+ * Parses the data from the supplied Reader, using the supplied baseURI to
+ * resolve any relative URI references.
+ *
+ * @param reader The Reader from which to read the data.
+ * @param baseURI The URI associated with the data in the InputStream.
+ * @throws java.io.IOException If an I/O error occurred while data was read from the InputStream.
+ * @throws org.openrdf.rio.RDFParseException
+ * If the parser has found an unrecoverable parse error.
+ * @throws org.openrdf.rio.RDFHandlerException
+ * If the configured statement handler has encountered an
+ * unrecoverable error.
+ */
+ @Override
+ public void parse(Reader reader, String baseURI) throws IOException, RDFParseException, RDFHandlerException {
+ Preconditions.checkNotNull(baseURI);
+
+ setBaseURI(baseURI);
+
+ WireFeedInput input = new WireFeedInput();
+ try {
+ WireFeed feed = input.build(reader);
+ if(feed instanceof Feed) {
+ parseFeed((Feed) feed);
+ } else {
+ throw new RDFParseException("data stream is not an RSS feed");
+ }
+ } catch (FeedException e) {
+ throw new RDFParseException(e);
+ }
+ }
+
+
+
+ private void parseFeedEntry(final Entry entry, final Resource r_feed) throws RDFParseException, RDFHandlerException {
+
+ final String entryURI = entry.getId();
+
+ URI r_entry = createURI(entryURI);
+ URI rdf_type = createURI(NS_RDF + "type");
+
+
+ // add type sioc:Post
+ rdfHandler.handleStatement(createStatement(r_entry, rdf_type, createURI(NS_SIOC + "Post")));
+
+ // add as sioc:container_of from parent feed
+ rdfHandler.handleStatement(createStatement(r_feed, createURI(NS_SIOC + "container_of"), r_entry));
+ rdfHandler.handleStatement(createStatement(r_entry, createURI(NS_SIOC + "has_container"), r_feed));
+
+ // for each link we create a sioc:link
+ for(Link link : entry.getAlternateLinks()) {
+ createUrlProperty(r_entry,NS_SIOC + "link", resolveURI(link.getHref()));
+ }
+
+ // add all authors as dc:creator
+ for(Person person : entry.getAuthors()) {
+ parsePerson(r_entry, person, "creator");
+ }
+
+ for(Object category : entry.getCategories()) {
+ parseCategory(r_entry, (Category)category);
+ }
+
+ for(Content content : entry.getContents()) {
+ createStringProperty(r_entry, NS_RSS_CONTENT + "encoded", content.getValue());
+ createStringProperty(r_entry, NS_RSS_CONTENT + "format", content.getType());
+ createStringProperty(r_entry, NS_DC_TERMS + "description",content.getValue());
+ }
+
+ // add all authors as dc:creator
+ for(Person person : entry.getContributors()) {
+ parsePerson(r_entry, person, "contributor");
+ }
+
+ createDateProperty(r_entry, NS_DC_TERMS + "created", entry.getCreated());
+
+ // ignore foreign markup
+
+ createDateProperty(r_entry, NS_DC_TERMS + "issued", entry.getPublished());
+ createDateProperty(r_entry, NS_DC_TERMS + "modified", entry.getUpdated());
+
+
+ // GUID is sometimes a URL but the documentation says this cannot be guaranteed, so we use dc:identifier
+ createStringProperty(r_entry, NS_DC_TERMS + "identifier", entry.getId());
+
+
+ for(Object module : entry.getModules()) {
+ if(module instanceof DCModule) {
+ parseDCModule(r_entry, (DCModule)module);
+ } else if(module instanceof GeoRSSModule) {
+ parseGeoModule(r_entry, (GeoRSSModule)module);
+ } else if(module instanceof MediaEntryModule) {
+ parseMediaModule(r_entry, (MediaEntryModule)module);
+ } else if(module instanceof ContentModule) {
+ parseContentModule(r_entry, (ContentModule)module);
+ } else {
+ log.warn("module {} not supported yet", module.toString());
+ }
+
+ // TODO: add support for more modules!
+ }
+
+ // for each link we create a sioc:link
+ for(Link link : entry.getOtherLinks()) {
+ createUrlProperty(r_entry,NS_SIOC + "link", resolveURI(link.getHref()));
+ }
+
+ // copyright information
+ createStringProperty(r_entry, NS_DC_TERMS + "rights", entry.getRights());
+
+ // if the source is present, we link just to the URL using dc:source and ignore the text
+ if(entry.getSource() != null) {
+ createUrlProperty(r_entry, NS_DC_TERMS + "source", entry.getSource().getId());
+ }
+
+ if(entry.getSummary() != null) {
+ createStringProperty(r_entry, NS_DC_TERMS + "abstract",entry.getSummary().getValue());
+ }
+
+ // title is dc:title
+ createStringProperty(r_entry, NS_DC_TERMS + "title", entry.getTitle());
+
+ log.debug("parsed Atom item {}", r_entry.stringValue());
+ }
+
+ /**
+ * Import data from an RSS or atom feed using the ROME SyndFeed representation.
+ *
+ * @param feed the ROME rss/atom feed representation
+ * @return count of imported documents
+ */
+ private void parseFeed(final Feed feed) throws RDFParseException, RDFHandlerException {
+ if (log.isInfoEnabled()) {
+ log.info("importing entries from {} feed '{}' found at '{}'",new Object[] {feed.getFeedType(),feed.getTitle(),feed.getId()});
+ }
+
+ final String feedUri = feed.getId();
+ if (feedUri == null) {
+ log.error("feed '{}' has neither uri nor link to reference", feed.getTitle());
+ return;
+ }
+
+ // we set some namespaces first
+ setNamespace(NS_DC_TERMS,"dcterms");
+ setNamespace(NS_RSS_SY,"sy");
+ setNamespace(NS_RSS_CONTENT,"content");
+ setNamespace(NS_SIOC,"sioc");
+
+ URI r_feed = createURI(feedUri);
+ URI rdf_type = createURI(NS_RDF + "type");
+
+ // add type sioc:Forum
+ rdfHandler.handleStatement(createStatement(r_feed, rdf_type, createURI(NS_SIOC + "Forum")));
+ createUrlProperty(r_feed, NS_SIOC + "feed", feedUri);
+
+ // for each link we create a sioc:link
+ for(Link link : feed.getAlternateLinks()) {
+ createUrlProperty(r_feed,NS_SIOC + "link", resolveURI(link.getHref()));
+ }
+
+ // add all authors as dc:creator
+ for(Person person : feed.getAuthors()) {
+ parsePerson(r_feed, person, "creator");
+ }
+
+
+ // add all categories that are present
+ for(Object category : feed.getCategories()) {
+ parseCategory(r_feed, (Category) category);
+ }
+
+ // add all contributors as dc:contributor
+ for(Person person : feed.getAuthors()) {
+ parsePerson(r_feed, person, "contributor");
+ }
+
+
+ // add dc:creator to point to the software used for generating feed
+ createStringProperty(r_feed, NS_DC_TERMS + "provenance", feed.getGenerator().getValue());
+
+ // add foaf:depiction in case there is an image
+ if(feed.getIcon() != null) {
+ createUrlProperty(r_feed, NS_FOAF + "thumbnail", resolveURI(feed.getIcon()));
+ }
+
+ // add all feed items
+ for(Entry item : feed.getEntries()) {
+ parseFeedEntry(item, r_feed);
+ }
+
+ // add dc:language for feed.getLanguage()
+ createStringProperty(r_feed, NS_DC_TERMS + "language", feed.getLanguage());
+
+ // add foaf:depiction in case there is an image
+ if(feed.getLogo() != null) {
+ createUrlProperty(r_feed, NS_FOAF + "logo", resolveURI(feed.getLogo()));
+ }
+
+
+
+ for(Module module : feed.getModules()) {
+ if(module instanceof SyModule) {
+ SyModule syModule = (SyModule)module;
+ createStringProperty(r_feed,NS_RSS_SY + "updatePeriod", syModule.getUpdatePeriod());
+ createIntProperty(r_feed, NS_RSS_SY + "updateFrequency", syModule.getUpdateFrequency());
+ createDateProperty(r_feed, NS_RSS_SY + "updateBase", syModule.getUpdateBase());
+ } else if(module instanceof DCModule) {
+ parseDCModule(r_feed, (DCModule)module);
+ }
+ }
+
+ // for each link we create a sioc:link
+ for(Link link : feed.getOtherLinks()) {
+ createUrlProperty(r_feed,NS_SIOC + "link", resolveURI(link.getHref()));
+ }
+ // add dc:rights for feed.getCopyright()
+ createStringProperty(r_feed, NS_DC_TERMS + "rights", feed.getRights());
+
+ // add dc:description for feed.getDescription()
+ if(feed.getSubtitle() != null) {
+ createStringProperty(r_feed, NS_DC_TERMS + "description", feed.getSubtitle().getValue());
+ }
+
+
+ // textinput: we skip it, the documentation says:
+ // "The purpose of the <textInput> element is something of a mystery. You can use it to specify a
+ // search engine box. Or to allow a reader to provide feedback. Most aggregators ignore it. "
+
+ createStringProperty(r_feed, NS_DC_TERMS + "title", feed.getTitle());
+
+ // add dc:created and dc:issued for update date
+ createDateProperty(r_feed, NS_DC_TERMS + "created", feed.getUpdated());
+ createDateProperty(r_feed, NS_DC_TERMS + "issued", feed.getUpdated());
+
+ log.info("importing Atom feed finished successfully.");
+ }
+
+ protected void parseCategory(Resource resource, Category category) throws RDFHandlerException, RDFParseException {
+ if(category.getTerm() == null) {
+ return;
+ }
+
+ try {
+ Resource skosConcept;
+ if(category.getScheme() != null ) {
+ // create a skos:Concept with the domain as namespace and a local name derived from the value, add it as sioc:topic
+ String localName = URLEncoder.encode(category.getTerm(), "UTF-8");
+ String namespace = category.getScheme();
+ skosConcept = createURI(namespace+(namespace.endsWith("/") || namespace.endsWith("#")?"":"/")+localName);
+ } else {
+ // create a skos:Concept with the baseUri as namespace and a local name derived from the value, add it as sioc:topic
+ String localName = URLEncoder.encode(category.getTerm(), "UTF-8");
+ skosConcept = resolveURI(localName);
+ }
+ createUrlProperty(skosConcept,NS_RDF + "type", NS_SKOS+"Concept");
+ if(category.getLabel() != null) {
+ createStringProperty(skosConcept, NS_SKOS + "prefLabel", category.getLabel());
+ } else {
+ createStringProperty(skosConcept, NS_SKOS + "prefLabel", category.getTerm());
+ }
+ rdfHandler.handleStatement(createStatement(resource,createURI(NS_SIOC + "topic"),skosConcept));
+ } catch (UnsupportedEncodingException e) {
+ throw new RDFParseException(e);
+ }
+
+
+ // add category value as dc:subject
+ if(category.getLabel() != null) {
+ createStringProperty(resource, NS_DC_TERMS + "subject", category.getLabel());
+ } else {
+ createStringProperty(resource, NS_DC_TERMS + "subject", category.getTerm());
+ }
+
+ }
+
+
+ protected void parsePerson(Resource r_entry, Person person, String relation) throws RDFParseException, RDFHandlerException {
+ if("creator".equals(relation) && (person.getUri() != null || person.getEmail() != null)) {
+ String personUri = person.getUri() != null ? person.getUri() : "mailto:"+person.getEmail();
+ Resource r_person = createURI(personUri);
+ createStringProperty(r_person, NS_FOAF + "name", person.getName());
+ if(person.getEmail() != null) {
+ createUrlProperty(r_person, NS_FOAF + "mbox", "mailto:"+person.getEmail());
+ }
+ createUrlProperty(r_person, NS_FOAF + "homepage", person.getUri());
+
+ rdfHandler.handleStatement(createStatement(r_entry, createURI(NS_FOAF + "maker"), r_person));
+ rdfHandler.handleStatement(createStatement(r_person, createURI(NS_FOAF + "made"), r_entry));
+ }
+ createStringProperty(r_entry,NS_DC_TERMS + relation,person.getName());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/21a28cf8/commons/sesame-tools-rio-rss/src/main/java/org/apache/marmotta/commons/sesame/rio/rss/AtomParserFactory.java
----------------------------------------------------------------------
diff --git a/commons/sesame-tools-rio-rss/src/main/java/org/apache/marmotta/commons/sesame/rio/rss/AtomParserFactory.java b/commons/sesame-tools-rio-rss/src/main/java/org/apache/marmotta/commons/sesame/rio/rss/AtomParserFactory.java
new file mode 100644
index 0000000..018e839
--- /dev/null
+++ b/commons/sesame-tools-rio-rss/src/main/java/org/apache/marmotta/commons/sesame/rio/rss/AtomParserFactory.java
@@ -0,0 +1,46 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.commons.sesame.rio.rss;
+
+import org.apache.marmotta.commons.sesame.rio.rss.AtomFormat;
+import org.openrdf.rio.RDFFormat;
+import org.openrdf.rio.RDFParser;
+import org.openrdf.rio.RDFParserFactory;
+
+/**
+ * Add file description here!
+ * <p/>
+ * Author: Sebastian Schaffert
+ */
+public class AtomParserFactory implements RDFParserFactory {
+
+ /**
+ * Returns the RDF format for this factory.
+ */
+ @Override
+ public RDFFormat getRDFFormat() {
+ return AtomFormat.FORMAT;
+ }
+
+ /**
+ * Returns a RDFParser instance.
+ */
+ @Override
+ public RDFParser getParser() {
+ return new AtomParser();
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-marmotta/blob/21a28cf8/commons/sesame-tools-rio-rss/src/main/java/org/apache/marmotta/commons/sesame/rio/rss/FeedParserBase.java
----------------------------------------------------------------------
diff --git a/commons/sesame-tools-rio-rss/src/main/java/org/apache/marmotta/commons/sesame/rio/rss/FeedParserBase.java b/commons/sesame-tools-rio-rss/src/main/java/org/apache/marmotta/commons/sesame/rio/rss/FeedParserBase.java
new file mode 100644
index 0000000..723367e
--- /dev/null
+++ b/commons/sesame-tools-rio-rss/src/main/java/org/apache/marmotta/commons/sesame/rio/rss/FeedParserBase.java
@@ -0,0 +1,315 @@
+/**
+ * Copyright (C) 2013 Salzburg Research.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.marmotta.commons.sesame.rio.rss;
+
+import com.sun.syndication.feed.module.DCModule;
+import com.sun.syndication.feed.module.DCSubject;
+import org.openrdf.model.Literal;
+import org.openrdf.model.Resource;
+import org.openrdf.model.URI;
+import org.openrdf.model.ValueFactory;
+import org.openrdf.model.impl.ValueFactoryImpl;
+import org.openrdf.rio.RDFHandlerException;
+import org.openrdf.rio.RDFParseException;
+import org.openrdf.rio.helpers.RDFParserBase;
+import org.rometools.feed.module.content.ContentModule;
+import org.rometools.feed.module.georss.GeoRSSModule;
+import org.rometools.feed.module.mediarss.MediaEntryModule;
+import org.rometools.feed.module.mediarss.types.MediaContent;
+import org.rometools.feed.module.mediarss.types.Metadata;
+import org.rometools.feed.module.mediarss.types.UrlReference;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.xml.datatype.DatatypeConfigurationException;
+import javax.xml.datatype.DatatypeFactory;
+import javax.xml.datatype.XMLGregorianCalendar;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.TimeZone;
+
+/**
+ * Common functionality for RSS and Atom feed parsing
+ * <p/>
+ * Author: Sebastian Schaffert
+ */
+public abstract class FeedParserBase extends RDFParserBase {
+ private static Logger log = LoggerFactory.getLogger(FeedParserBase.class);
+
+
+ protected static final String NS_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+ protected static final String NS_DC = "http://purl.org/dc/elements/1.1/";
+ protected static final String NS_DC_TERMS = "http://purl.org/dc/terms/";
+ protected static final String NS_SIOC = "http://rdfs.org/sioc/ns#";
+ protected static final String NS_SKOS = "http://www.w3.org/2004/02/skos/core#";
+ protected static final String NS_RSS = "http://purl.org/rss/1.0/";
+ protected static final String NS_RSS_CONTENT = "http://purl.org/rss/1.0/modules/content/";
+ protected static final String NS_RSS_SY = "http://purl.org/rss/1.0/modules/syndication/";
+ protected static final String NS_ADMIN = "http://webns.net/mvcb/";
+ protected static final String NS_FOAF = "http://xmlns.com/foaf/0.1/";
+ protected static final String NS_GEO = "http://www.w3.org/2003/01/geo/wgs84_pos#";
+ protected static final String NS_MA = "http://www.w3.org/ns/ma-ont#";
+
+
+ protected ValueFactory valueFactory;
+
+
+ /**
+ * Creates a new RDFParserBase that will use a {@link org.openrdf.model.impl.ValueFactoryImpl} to
+ * create RDF model objects.
+ */
+ protected FeedParserBase() {
+ this(new ValueFactoryImpl());
+ }
+
+ /**
+ * Creates a new RDFParserBase that will use the supplied ValueFactory to
+ * create RDF model objects.
+ *
+ * @param valueFactory A ValueFactory.
+ */
+ protected FeedParserBase(ValueFactory valueFactory) {
+ super(valueFactory);
+ this.valueFactory = valueFactory;
+ }
+
+ @Override
+ public void setValueFactory(ValueFactory valueFactory) {
+ super.setValueFactory(valueFactory);
+ this.valueFactory = valueFactory;
+ }
+
+
+ protected void parseDCModule(Resource resource, DCModule dcModule) throws RDFHandlerException, RDFParseException {
+ for(String contributor : dcModule.getContributors()) {
+ createStringProperty(resource, NS_DC_TERMS + "contributor", contributor);
+ }
+ for(String coverage : dcModule.getCoverages()) {
+ createStringProperty(resource, NS_DC_TERMS + "coverage", coverage);
+ }
+ for(String creator : dcModule.getCreators()) {
+ createStringProperty(resource, NS_DC_TERMS + "creator", creator);
+ }
+ for(Date date : dcModule.getDates()) {
+ createDateProperty(resource, NS_DC_TERMS + "date", date);
+ }
+ for(String description : dcModule.getDescriptions()) {
+ createStringProperty(resource, NS_DC_TERMS + "description", description);
+ }
+ for(String format : dcModule.getFormats()) {
+ createStringProperty(resource, NS_DC_TERMS + "format", format);
+ }
+ for(String identifier : dcModule.getIdentifiers()) {
+ createStringProperty(resource, NS_DC_TERMS + "identifier", identifier);
+ }
+ for(String language : dcModule.getLanguages()) {
+ createStringProperty(resource, NS_DC_TERMS + "language", language);
+ }
+ for(String publisher : dcModule.getPublishers()) {
+ createStringProperty(resource, NS_DC_TERMS + "publisher", publisher);
+ }
+ for(String relation : dcModule.getRelations()) {
+ createUrlProperty(resource, NS_DC_TERMS + "relation", relation);
+ }
+ for(String rights : dcModule.getRightsList()) {
+ createStringProperty(resource, NS_DC_TERMS + "rights", rights);
+ }
+ for(String source : dcModule.getSources()) {
+ createUrlProperty(resource, NS_DC_TERMS + "source", source);
+ }
+ for(DCSubject subject : dcModule.getSubjects()) {
+ parseDCSubject(resource, subject);
+ }
+ for(String title : dcModule.getTitles()) {
+ createStringProperty(resource, NS_DC_TERMS + "title", title);
+ }
+ for(String type : dcModule.getTypes()) {
+ createStringProperty(resource, NS_DC_TERMS + "type", type);
+ }
+ }
+
+ protected void parseContentModule(Resource resource, ContentModule contentModule) throws RDFHandlerException, RDFParseException {
+ for(Object content : contentModule.getEncodeds()) {
+ createStringProperty(resource,NS_RSS_CONTENT + "encoded",(String)content);
+ }
+
+ // TODO: more sophisticated forms are nowadays rarely used, we do not support them
+ if(contentModule.getContentItems() != null && contentModule.getContentItems().size() > 0) {
+ log.warn("content items are not supported yet");
+ }
+ }
+
+ protected void parseGeoModule(Resource resource, GeoRSSModule geoRSSModule) throws RDFParseException, RDFHandlerException {
+ if(geoRSSModule.getPosition() != null) {
+ Resource r_location = createBNode();
+ Resource t_adr = createURI(NS_GEO + "Point");
+ URI p_type = createURI(NS_RDF + "type");
+ rdfHandler.handleStatement(createStatement(r_location,p_type,t_adr));
+
+ createDoubleProperty(r_location,NS_GEO+"latitude",geoRSSModule.getPosition().getLatitude());
+ createDoubleProperty(r_location,NS_GEO+"longitude",geoRSSModule.getPosition().getLongitude());
+
+
+ rdfHandler.handleStatement(createStatement(resource,createURI(NS_DC_TERMS + "spatial"),r_location));
+
+ }
+ }
+
+ protected void parseMediaModule(Resource resource, MediaEntryModule mediaEntryModule) throws RDFParseException, RDFHandlerException {
+ for(MediaContent content : mediaEntryModule.getMediaContents()) {
+ if(content.getReference() != null && content.getReference() instanceof UrlReference) {
+ URI r_media = createURI(((UrlReference) content.getReference()).getUrl().toString());
+ rdfHandler.handleStatement(createStatement(r_media, createURI(NS_RDF + "type"), createURI(NS_MA + "MediaResource")));
+ rdfHandler.handleStatement(createStatement(r_media, createURI(NS_MA + "locator"), r_media));
+
+ if(content.getBitrate() != null)
+ createDoubleProperty(r_media, NS_MA + "averageBitRate", content.getBitrate());
+ if(content.getDuration() != null)
+ createLongProperty(r_media, NS_MA + "duration", content.getDuration());
+
+ createStringProperty(r_media, NS_MA + "hasFormat", content.getType());
+
+ if(content.getFramerate() != null)
+ createDoubleProperty(r_media, NS_MA + "frameRate", content.getFramerate());
+
+ if(content.getHeight() != null)
+ createIntProperty(r_media, NS_MA + "frameHeight", content.getHeight());
+ if(content.getWidth() != null)
+ createIntProperty(r_media, NS_MA + "frameWidth", content.getWidth());
+
+ createStringProperty(r_media, NS_MA + "hasLanguage", content.getLanguage());
+
+ if(content.getMetadata() != null) {
+ Metadata metadata = content.getMetadata();
+
+ createStringProperty(r_media, NS_MA + "title", metadata.getTitle());
+ createStringProperty(r_media, NS_MA + "copyright", metadata.getCopyright());
+ createStringProperty(r_media, NS_MA + "description", metadata.getDescription());
+
+ for(String keyword : metadata.getKeywords()) {
+ createStringProperty(r_media, NS_MA + "hasKeyword", keyword);
+ }
+
+ }
+
+ rdfHandler.handleStatement(createStatement(resource, createURI(NS_SIOC+"hasPart"), r_media));
+ }
+ }
+
+ }
+
+
+
+ protected void parseDCSubject(Resource resource, DCSubject category) throws RDFHandlerException, RDFParseException {
+ if(category.getValue() == null) {
+ return;
+ }
+
+ try {
+ Resource skosConcept;
+ if(category.getTaxonomyUri() != null && category.getValue() != null) {
+ // create a skos:Concept with the domain as namespace and a local name derived from the value, add it as sioc:topic
+ String localName = URLEncoder.encode(category.getValue(),"UTF-8");
+ String namespace = category.getTaxonomyUri();
+ skosConcept = createURI(namespace+(namespace.endsWith("/") || namespace.endsWith("#")?"":"/")+localName);
+ } else {
+ // create a skos:Concept with the baseUri as namespace and a local name derived from the value, add it as sioc:topic
+ String localName = URLEncoder.encode(category.getValue(), "UTF-8");
+ skosConcept = resolveURI(localName);
+ }
+ createUrlProperty(skosConcept,NS_RDF + "type", NS_SKOS+"Concept");
+ createStringProperty(skosConcept, NS_SKOS + "prefLabel", category.getValue());
+ rdfHandler.handleStatement(createStatement(resource,createURI(NS_SIOC + "topic"),skosConcept));
+ } catch (UnsupportedEncodingException e) {
+ throw new RDFParseException(e);
+ }
+
+
+ // add category value as dc:subject
+ createStringProperty(resource, NS_DC_TERMS + "subject", category.getValue());
+
+ }
+
+
+ protected void createStringProperty(Resource resource, String rdfProperty, String value) throws RDFParseException, RDFHandlerException {
+ if(value != null && !"".equals(value.trim())) {
+ URI p_description = createURI(rdfProperty);
+ Literal v_description = createLiteral(value, null, null);
+ rdfHandler.handleStatement(createStatement(resource,p_description,v_description));
+ }
+ }
+
+ protected void createIntProperty(Resource resource, String rdfProperty, int value) throws RDFParseException, RDFHandlerException {
+ URI p_description = createURI(rdfProperty);
+ Literal v_description = createLiteral(""+value, null, createURI("http://www.w3.org/2001/XMLSchema#int"));
+ rdfHandler.handleStatement(createStatement(resource,p_description,v_description));
+ }
+
+ protected void createLongProperty(Resource resource, String rdfProperty, long value) throws RDFParseException, RDFHandlerException {
+ URI p_description = createURI(rdfProperty);
+ Literal v_description = createLiteral(""+value, null, createURI("http://www.w3.org/2001/XMLSchema#long"));
+ rdfHandler.handleStatement(createStatement(resource,p_description,v_description));
+ }
+
+ protected void createDoubleProperty(Resource resource, String rdfProperty, double value) throws RDFParseException, RDFHandlerException {
+ URI p_description = createURI(rdfProperty);
+ Literal v_description = createLiteral(""+value, null, createURI("http://www.w3.org/2001/XMLSchema#double"));
+ rdfHandler.handleStatement(createStatement(resource,p_description,v_description));
+ }
+
+
+ protected void createDateProperty(Resource resource, String rdfProperty, Date value) throws RDFParseException, RDFHandlerException {
+ if(value != null) {
+ URI p_dateprop = createURI(rdfProperty);
+ Literal v_dateprop = valueFactory.createLiteral(getXMLCalendar(value,null));
+ rdfHandler.handleStatement(createStatement(resource,p_dateprop,v_dateprop));
+ }
+ }
+
+
+ protected void createUrlProperty(Resource resource, String rdfProperty, String value) throws RDFParseException, RDFHandlerException {
+ if(value != null) {
+ URI p_description = createURI(rdfProperty);
+ URI v_description = createURI(value);
+ rdfHandler.handleStatement(createStatement(resource,p_description,v_description));
+ }
+ }
+
+ protected void createUrlProperty(Resource resource, String rdfProperty, Resource value) throws RDFParseException, RDFHandlerException {
+ if(value != null) {
+ URI p_description = createURI(rdfProperty);
+ rdfHandler.handleStatement(createStatement(resource,p_description,value));
+ }
+ }
+
+
+ protected static XMLGregorianCalendar getXMLCalendar(Date date, TimeZone timezone) {
+ GregorianCalendar c = new GregorianCalendar();
+ c.setTime(date);
+ if(timezone != null)
+ c.setTimeZone(timezone);
+ try {
+ return DatatypeFactory.newInstance().newXMLGregorianCalendar(c);
+ } catch (DatatypeConfigurationException e) {
+ return null;
+ }
+ }
+
+
+
+}