You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/02/24 14:32:21 UTC
svn commit: r1293245 - in /incubator/stanbol/trunk: ./ enhancer/
enhancer/ldpath/ enhancer/ldpath/src/ enhancer/ldpath/src/main/
enhancer/ldpath/src/main/java/ enhancer/ldpath/src/main/java/org/
enhancer/ldpath/src/main/java/org/apache/ enhancer/ldpath...
Author: rwesten
Date: Fri Feb 24 13:32:19 2012
New Revision: 1293245
URL: http://svn.apache.org/viewvc?rev=1293245&view=rev
Log:
STANBOL-500: First version of the LDPath support for ContentItems and the Stanbol Enhancement Structure
See the unit tests for example usages
Added:
incubator/stanbol/trunk/enhancer/ldpath/ (with props)
incubator/stanbol/trunk/enhancer/ldpath/README.txt (with props)
incubator/stanbol/trunk/enhancer/ldpath/pom.xml (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/
incubator/stanbol/trunk/enhancer/ldpath/src/main/
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/backend/
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/backend/Constants.java (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/backend/ContentItemBackend.java (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/ContentFunction.java (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/ContentItemFunction.java (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/PathFunction.java (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/SuggestionFunction.java (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/TextAnnotationFunction.java (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/utils/
incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/utils/Utils.java (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/test/
incubator/stanbol/trunk/enhancer/ldpath/src/test/java/
incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/
incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/
incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/
incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/
incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/ldpath/
incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/ldpath/ContentItemBackendTest.java (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/ldpath/UsageExamples.java (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/
incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/content.html (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/content.txt (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/example.rdf.zip (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/example.txt (with props)
incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/metadata.rdf.zip (with props)
Modified:
incubator/stanbol/trunk/enhancer/pom.xml
incubator/stanbol/trunk/pom.xml
Propchange: incubator/stanbol/trunk/enhancer/ldpath/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Fri Feb 24 13:32:19 2012
@@ -0,0 +1,7 @@
+.settings
+
+.project
+
+.classpath
+
+target
Added: incubator/stanbol/trunk/enhancer/ldpath/README.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/README.txt?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/README.txt (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/README.txt Fri Feb 24 13:32:19 2012
@@ -0,0 +1,87 @@
+Jersey front-end to the FISE engine
+===================================
+
+Goals for this sub-project:
+
+- RESTful web services API to Stanbol Enhancer for machines.
+
+- Human-friendly HTML interface to quickly test the service and document the API.
+
+
+Building from source
+--------------------
+
+Checkount and build Stanbol Enhancer and Stanbol Entityhub:
+
+ $ svn checkout http://svn.apache.org/repos/asf/incubator/stanbol/trunk/entityhub stanbol-entityhub
+ $ cd stanbol-entityhub
+ $ mvn clean install
+ $ cd ..
+
+ $ svn checkout http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer stanbol-enhancer
+ $ cd stanbol-enhancer
+ $ mvn clean install
+
+
+
+Deployment
+----------
+
+Go to
+ - 'launchers/lite/target' to run the default configuration that comes with
+ a set of engines that is considered as stable
+ - 'launchers/full/target' to run the configuration that contains all availanle
+ engines.Stanbol
+
+ rm -rf sling && java -jar org.apache.stanbol.enhancer.launchers.sling-*-SNAPSHOT.jar
+
+Once deployed (check the logs) you can either use the HTML interface:
+
+ http://localhost:8080
+
+To submit data to engines using a simple form:
+
+ http://localhost:8080/engines
+
+If that page gives errors such as class not found, no web provider, stop and restart the jersey-server bundle.
+There's a bug in Jersey 1.2 that causes those if the core bundle starts before the server bundle.
+
+You can setup an apache virtualhost to blend a Stanbol ehnacer instance on you
+domain name using a ``NameVirtualHost *:80`` instruction in you global apache
+configuration and the following virtualhost parameters::
+
+ <VirtualHost *:80>
+
+ ServerName stanbol-enhancer.example.com
+
+ CustomLog logs/stanbol-enhancer.example.com.access.log combined
+ ErrorLog logs/stanbol-enhancer.example.com.error.log
+
+ ProxyPass/ http://localhost:8080/
+ ProxyPassReverse/ http://localhost:8080/
+ ProxyPreserveHostOn
+
+ </VirtualHost>
+
+
+Stateless operation
+-------------------
+
+You can use the HTTP (somewhat RESTful) API directly with cURL for instance:
+
+ $ curl -X POST -H "Content-type: text/plain" --data "Paris is a beautiful city." http://localhost:8080/engines/
+
+You can force a specific RDF serialization scheme by setting the "Accept" HTTP header:
+
+ $ curl -X POST -H "Content-type: text/plain" -H "Accept: application/rdf+xml" --data "Paris is a beautiful city." http://localhost:8080/engines/
+
+Or to upload the content of a file:
+
+ $ curl -X POST -H "Content-type: text/plain" -H "Accept: text/rdf+nt" --data @/path/to/my_local_file.txt http://localhost:8080/engines/
+
+
+Asynchronous and stateful operation
+-----------------------------------
+
+TODO: implement and document me!
+
Propchange: incubator/stanbol/trunk/enhancer/ldpath/README.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/pom.xml?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/pom.xml (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/pom.xml Fri Feb 24 13:32:19 2012
@@ -0,0 +1,147 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+ <version>0.9.0-incubating-SNAPSHOT</version>
+ <relativePath>../parent/pom.xml</relativePath>
+ </parent>
+
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.ldpath</artifactId>
+ <packaging>bundle</packaging>
+
+ <name>Apache Stanbol Enhancer ldpath</name>
+ <description>ldpath support for the Stanbol Enhancer</description>
+
+ <scm>
+ <connection>
+ scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/ldpath/
+ </connection>
+ <developerConnection>
+ scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/ldpath/
+ </developerConnection>
+ <url>http://incubator.apache.org/stanbol/</url>
+ </scm>
+
+ <build>
+ <!-- make it an OSGi bundle -->
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-scr-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Export-Package>
+ org.apache.stanbol.enhancer.ldpath.*
+ </Export-Package>
+ <!--
+ <Embed-Dependency>*;scope=compile|runtime;inline=false;artifactId=
+ </Embed-Dependency>
+ <Embed-Transitive>true</Embed-Transitive>
+ -->
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.rat</groupId>
+ <artifactId>apache-rat-plugin</artifactId>
+ <configuration>
+ <excludes>
+ <!-- AL20 License -->
+ <exclude>src/license/THIRD-PARTY.properties</exclude>
+ </excludes>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+
+ <!-- dependencies on other Stanbol modules -->
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+ </dependency>
+ <!-- Clerezza dependencies -->
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.utils</artifactId>
+ </dependency>
+ <!-- LD-Path -->
+ <dependency>
+ <groupId>at.newmedialab.ldpath</groupId>
+ <artifactId>ldpath-api</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>at.newmedialab.ldpath</groupId>
+ <artifactId>ldpath-core-bundle</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.commons.ldpath.clerezza</artifactId>
+ </dependency>
+
+ <!-- generic tax -->
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+
+ <!-- for tests -->
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-simple</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.ontologies</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.commons.indexedgraph</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.clerezza</groupId>
+ <artifactId>rdf.jena.parser</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+</project>
Propchange: incubator/stanbol/trunk/enhancer/ldpath/pom.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/backend/Constants.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/backend/Constants.java?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/backend/Constants.java (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/backend/Constants.java Fri Feb 24 13:32:19 2012
@@ -0,0 +1,9 @@
+package org.apache.stanbol.enhancer.ldpath.backend;
+
+public final class Constants {
+
+ private Constants(){}
+
+ private static final String CI_NAMESPACE = "urn:apache.stanbol.enhancer:ldpath.property:";
+
+}
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/backend/Constants.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/backend/ContentItemBackend.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/backend/ContentItemBackend.java?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/backend/ContentItemBackend.java (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/backend/ContentItemBackend.java Fri Feb 24 13:32:19 2012
@@ -0,0 +1,238 @@
+package org.apache.stanbol.enhancer.ldpath.backend;
+
+import static java.util.Collections.emptyMap;
+import static java.util.Collections.unmodifiableMap;
+import static org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper.getContentParts;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.net.URI;
+import java.util.Collection;
+import java.util.Date;
+import java.util.LinkedHashMap;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.locks.Lock;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.TripleCollection;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.utils.UnionMGraph;
+import org.apache.stanbol.commons.ldpath.clerezza.ClerezzaBackend;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import at.newmedialab.ldpath.api.backend.RDFBackend;
+
+/**
+ * Basically a {@link ClerezzaBackend} over {@link ContentItem#getMetadata()}
+ * that ensures read locks to be used for queries on subjects and objects.
+ * @author Rupert Westenthaler
+ *
+ */
+public class ContentItemBackend implements RDFBackend<Resource>{
+
+ private final Logger log = LoggerFactory.getLogger(ContentItemBackend.class);
+
+ private static final Map<UriRef,TripleCollection> EMPTY_INCLUDED = emptyMap();
+
+ private final ContentItem ci;
+ private final Lock readLock;
+ private final ClerezzaBackend backend;
+ private final Map<UriRef,TripleCollection> included;
+
+ /**
+ * Creates a {@link RDFBackend} over the {@link ContentItem#getMetadata()
+ * metadata} of the parsed content item.
+ * @param ci the content item
+ */
+ public ContentItemBackend(ContentItem ci) {
+ this(ci,false);
+ }
+ /**
+ * Creates a {@link RDFBackend} over the {@link ContentItem#getMetadata()
+ * metadata} and all {@link ContentItem#getPart(int, Class) content parts}
+ * compatible to {@link TripleCollection}
+ * @param ci the content item
+ * @param includeAdditionalMetadata if <code>true</code> the {@link RDFBackend}
+ * will also include RDF data stored in content parts
+ */
+ public ContentItemBackend(ContentItem ci, boolean includeAdditionalMetadata){
+ included = includeAdditionalMetadata ?
+ unmodifiableMap(getContentParts(ci, TripleCollection.class)) :
+ EMPTY_INCLUDED;
+ MGraph graph;
+ if(!included.isEmpty()){
+ graph = ci.getMetadata();
+ } else {
+ TripleCollection[] tcs = new TripleCollection[included.size()+1];
+ tcs[0] = ci.getMetadata();
+ System.arraycopy(tcs, 1, included.values().toArray(), 0, included.size());
+ graph = new UnionMGraph(tcs);
+ }
+ backend = new ClerezzaBackend(graph);
+ this.ci = ci;
+ this.readLock = ci.getLock().readLock();
+ }
+ /**
+ * Creates a {@link RDFBackend} over the {@link ContentItem#getMetadata()
+ * metadata} and RDF data stored in content parts with the parsed URIs.
+ * If no content part for a parsed URI exists or its type is not compatible
+ * to {@link TripleCollection} it will be not included.
+ * @param ci the content item
+ * @param includedMetadata the URIs for the content parts to include
+ */
+ public ContentItemBackend(ContentItem ci, Set<UriRef> includedMetadata){
+ Map<UriRef,TripleCollection> included = new LinkedHashMap<UriRef,TripleCollection>();
+ for(UriRef ref : includedMetadata){
+ try {
+ TripleCollection metadata = ci.getPart(ref, TripleCollection.class);
+ included.put(ref, metadata);
+ } catch (RuntimeException e) {
+ log.warn("Unable to add requested Metadata-ContentPart "+ref+" to" +
+ "ContentItemBackend "+ci.getUri(),e);
+ }
+ }
+ this.included = unmodifiableMap(included);
+ MGraph graph;
+ if(!included.isEmpty()){
+ graph = ci.getMetadata();
+ } else {
+ TripleCollection[] tcs = new TripleCollection[included.size()+1];
+ tcs[0] = ci.getMetadata();
+ System.arraycopy(tcs, 1, included.values().toArray(), 0, included.size());
+ graph = new UnionMGraph(tcs);
+ }
+ backend = new ClerezzaBackend(graph);
+ this.ci = ci;
+ this.readLock = ci.getLock().readLock();
+ }
+
+
+ @Override
+ public Collection<Resource> listObjects(Resource subject, Resource property) {
+ readLock.lock();
+ try {
+ return backend.listObjects(subject, property);
+ } finally {
+ readLock.unlock();
+ }
+ }
+
+ @Override
+ public Collection<Resource> listSubjects(Resource property, Resource object) {
+ readLock.lock();
+ try {
+ return backend.listSubjects(property, object);
+ } finally {
+ readLock.unlock();
+ }
+ }
+ /**
+ * Getter for the content item
+ * @return the content item
+ */
+ public ContentItem getContentItem(){
+ return ci;
+ }
+ /**
+ * Getter for the read-only map of the content parts included in this
+ * RDF backend
+ * @return the content parts included in this {@link RDFBackend}
+ */
+ public Map<UriRef,TripleCollection> getIncludedMetadata(){
+ return included;
+ }
+
+ @Override
+ public boolean isLiteral(Resource n) {
+ return backend.isLiteral(n);
+ }
+ @Override
+ public boolean isURI(Resource n) {
+ return backend.isURI(n);
+ }
+ @Override
+ public boolean isBlank(Resource n) {
+ return backend.isBlank(n);
+ }
+ @Override
+ public Locale getLiteralLanguage(Resource n) {
+ return backend.getLiteralLanguage(n);
+ }
+ @Override
+ public URI getLiteralType(Resource n) {
+ return backend.getLiteralType(n);
+ }
+ @Override
+ public Resource createLiteral(String content) {
+ return backend.createLiteral(content);
+ }
+ @Override
+ public Resource createLiteral(String content, Locale language, URI type) {
+ return backend.createLiteral(content, language, type);
+ }
+ @Override
+ public Resource createURI(String uri) {
+ return backend.createURI(uri);
+ }
+ @Override
+ public String stringValue(Resource node) {
+ return backend.stringValue(node);
+ }
+ @Override
+ public Double doubleValue(Resource node) {
+ return backend.doubleValue(node);
+ }
+ @Override
+ public Long longValue(Resource node) {
+ return backend.longValue(node);
+ }
+ @Override
+ public Boolean booleanValue(Resource node) {
+ return backend.booleanValue(node);
+ }
+ @Override
+ public Date dateTimeValue(Resource node) {
+ return backend.dateTimeValue(node);
+ }
+ @Override
+ public Date dateValue(Resource node) {
+ return backend.dateValue(node);
+ }
+ @Override
+ public Date timeValue(Resource node) {
+ return backend.timeValue(node);
+ }
+ @Override
+ public Float floatValue(Resource node) {
+ return backend.floatValue(node);
+ }
+ @Override
+ public Integer intValue(Resource node) {
+ return backend.intValue(node);
+ }
+ @Override
+ public BigInteger integerValue(Resource node) {
+ return backend.integerValue(node);
+ }
+ @Override
+ public BigDecimal decimalValue(Resource node) {
+ return backend.decimalValue(node);
+ }
+
+ /* NO SUPPORT FOR THREADING REQUIRED */
+ @Override
+ public boolean supportsThreading() {
+ return false;
+ }
+ @Override
+ public ThreadPoolExecutor getThreadPool() {
+ return null;
+ }
+
+}
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/backend/ContentItemBackend.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/ContentFunction.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/ContentFunction.java?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/ContentFunction.java (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/ContentFunction.java Fri Feb 24 13:32:19 2012
@@ -0,0 +1,106 @@
+package org.apache.stanbol.enhancer.ldpath.function;
+
+import static at.newmedialab.ldpath.util.Collections.iterator;
+import static org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper.parseMimeType;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.enhancer.ldpath.backend.ContentItemBackend;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import at.newmedialab.ldpath.api.functions.SelectorFunction;
+
+/**
+ * Provides access to the contents stored in {@link Blob}s added as content parts
+ * to a contentItem.<p>
+ *
+ * @author Rupert Westenthaler
+ *
+ */
+public class ContentFunction extends ContentItemFunction implements SelectorFunction<Resource> {
+
+ Logger log = LoggerFactory.getLogger(ContentFunction.class);
+ LiteralFactory lf = LiteralFactory.getInstance();
+
+ public ContentFunction(){
+ super("content");
+ }
+
+ @Override
+ public Collection<Resource> apply(ContentItemBackend backend, Collection<Resource>... args) throws IllegalArgumentException {
+ ContentItem ci = ((ContentItemBackend)backend).getContentItem();
+// Collection<Resource> contexts = args[0];
+ Set<String> mimeTypes;
+ if(args == null || args.length < 1){
+ mimeTypes = null;
+ } else {
+//TODO: Wait for ld-path to parse the context
+// http://code.google.com/p/ldpath/issues/detail?id=7
+// //1. check if the first parameter is the context
+// if(!args[0].isEmpty() && backend.isURI(args[0].iterator().next())){
+// contexts = args[0];
+// if(args.length > 1){ // cut the context from the args
+// Collection<Resource>[] tmp = new Collection[args.length-1];
+// System.arraycopy(args, 0, tmp, 0, tmp.length);
+// args = tmp;
+// } else {
+// args = new Collection[]{};
+// }
+// } else { //use the ContentItem as context
+// contexts = java.util.Collections.singleton((Resource)ci.getUri());
+// }
+ mimeTypes = new HashSet<String>();
+ for(Iterator<Resource> params = iterator(args);params.hasNext();){
+ Resource param = params.next();
+ String mediaTypeString = backend.stringValue(param);
+ try {
+ mimeTypes.add(parseMimeType(mediaTypeString).get(null));
+ } catch (IllegalArgumentException e) {
+ log.warn(String.format("Invalid mediaType '%s' (based on RFC 2046) parsed!",
+ mediaTypeString),e);
+ }
+ }
+ }
+ Collection<Resource> result;
+ Blob blob;
+ if(mimeTypes == null || mimeTypes.isEmpty()){
+ blob = ci.getBlob();
+ } else {
+ Entry<UriRef,Blob> entry = ContentItemHelper.getBlob(ci, mimeTypes);
+ blob = entry != null ? entry.getValue() : null;
+ }
+ if(blob == null){
+ result = java.util.Collections.emptySet();
+ } else {
+ String charset = blob.getParameter().get("charset");
+ try {
+ if(charset != null){
+ result = java.util.Collections.singleton(
+ backend.createLiteral(IOUtils.toString(blob.getStream(), charset)));
+ } else { //binary content
+ byte[] data = IOUtils.toByteArray(blob.getStream());
+ result = java.util.Collections.singleton(
+ (Resource)lf.createTypedLiteral(data));
+ }
+ } catch (IOException e) {
+ throw new IllegalStateException("Unable to read contents from Blob '"
+ + blob.getMimeType()+"' of ContentItem "+ci.getUri(),e);
+ }
+ }
+ return result;
+ }
+
+}
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/ContentFunction.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/ContentItemFunction.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/ContentItemFunction.java?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/ContentItemFunction.java (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/ContentItemFunction.java Fri Feb 24 13:32:19 2012
@@ -0,0 +1,49 @@
+package org.apache.stanbol.enhancer.ldpath.function;
+
+import java.util.Collection;
+
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.stanbol.enhancer.ldpath.backend.ContentItemBackend;
+
+import at.newmedialab.ldpath.api.backend.RDFBackend;
+import at.newmedialab.ldpath.api.functions.SelectorFunction;
+
+/**
+ * This class checks if the {@link RDFBackend} parsed to
+ * {@link #apply(ContentItemBackend, Collection...) apply} is an instance of
+ * {@link ContentItemBackend}. It also implements the
+ * {@link #getPathExpression(RDFBackend)} method by returning the name parsed
+ * in the constructor.
+ *
+ * @author Rupert Westenthaler
+ *
+ */
+public abstract class ContentItemFunction implements SelectorFunction<Resource> {
+
+ private final String name;
+
+ protected ContentItemFunction(String name){
+ if(name == null || name.isEmpty()){
+ throw new IllegalArgumentException("The parsed name MUST NOT be NULL nor empty!");
+ }
+ this.name = name;
+ }
+
+ public final Collection<Resource> apply(RDFBackend<Resource> backend, Collection<Resource>... args) throws IllegalArgumentException {
+ if(backend instanceof ContentItemBackend){
+ return apply((ContentItemBackend)backend, args);
+ } else {
+ throw new IllegalArgumentException("This ContentFunction can only be " +
+ "used in combination with an RDFBackend of type '"+
+ ContentItemBackend.class.getSimpleName()+"' (parsed Backend: "+
+ backend.getClass()+")!");
+ }
+ };
+
+ public abstract Collection<Resource> apply(ContentItemBackend backend,Collection<Resource>... args);
+
+ @Override
+ public String getPathExpression(RDFBackend<Resource> backend) {
+ return name;
+ }
+}
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/ContentItemFunction.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/PathFunction.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/PathFunction.java?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/PathFunction.java (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/PathFunction.java Fri Feb 24 13:32:19 2012
@@ -0,0 +1,61 @@
+package org.apache.stanbol.enhancer.ldpath.function;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+import at.newmedialab.ldpath.api.backend.RDFBackend;
+import at.newmedialab.ldpath.api.functions.SelectorFunction;
+import at.newmedialab.ldpath.api.selectors.NodeSelector;
+
+/**
+ * Maps a {@link NodeSelector} to a function name. This is useful to provide
+ * function shortcuts for longer ld-path statements.
+ * @author Rupert Westenthaler
+ *
+ * @param <Node>
+ */
+public class PathFunction<Node> implements SelectorFunction<Node> {
+
+ private final String name;
+ private final NodeSelector<Node> selector;
+
+ /**
+ * create a function available under fn:{name} for the parsed selector
+ * @param name the name of the function MUST NOT be <code>null</code> nor
+ * empty
+ * @param selector the selector MUST NOT be <code>null</code>
+ * @throws IllegalArgumentException if the parsed name is <code>null</code>
+ * or empty; if the parsed {@link NodeSelector} is <code>null</code>.
+ */
+ public PathFunction(String name, NodeSelector<Node> selector){
+ if(name == null || name.isEmpty()){
+ throw new IllegalArgumentException("The parsed function name MUST NOT be NULL nor empty!");
+ }
+ this.name = name;
+ if(selector == null){
+ throw new IllegalArgumentException("The parsed NodeSelector MUST NOT be NULL!");
+ }
+ this.selector = selector;
+ }
+
+ @Override
+ public Collection<Node> apply(RDFBackend<Node> backend, Collection<Node>... args) throws IllegalArgumentException {
+ if(args == null || args.length < 1 || args[0] == null || args[0].isEmpty()){
+ throw new IllegalArgumentException("The 'fn:"+name+"' function " +
+ "requires at least a single none empty parameter (the context). Use 'fn:" +
+ name+"(.)' to execute it on the path context!");
+ }
+ Set<Node> selected = new HashSet<Node>();
+ for(Node context : args[0]){
+ selected.addAll(selector.select(backend, context));
+ }
+ return selected;
+ }
+
+ @Override
+ public String getPathExpression(RDFBackend<Node> backend) {
+ return name;
+ }
+
+}
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/PathFunction.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/SuggestionFunction.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/SuggestionFunction.java?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/SuggestionFunction.java (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/SuggestionFunction.java Fri Feb 24 13:32:19 2012
@@ -0,0 +1,214 @@
+package org.apache.stanbol.enhancer.ldpath.function;
+
+import static java.util.Collections.singletonMap;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.apache.clerezza.rdf.core.Resource;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import at.newmedialab.ldpath.api.backend.RDFBackend;
+import at.newmedialab.ldpath.api.functions.SelectorFunction;
+import at.newmedialab.ldpath.api.selectors.NodeSelector;
+import at.newmedialab.ldpath.model.transformers.IntTransformer;
+import at.newmedialab.ldpath.model.transformers.StringTransformer;
+
+public class SuggestionFunction implements SelectorFunction<Resource> {
+
+ private static final Comparator<Entry<Double,Resource>> SUGGESTION_COMPARATOR =
+ new Comparator<Entry<Double,Resource>>() {
+
+ @Override
+ public int compare(Entry<Double,Resource> e1, Entry<Double,Resource> e2) {
+ return e2.getKey().compareTo(e1.getKey());
+ }
+
+ };
+ private static final int MISSING_CONFIDENCE_FIRST = -1;
+ private static final int MISSING_CONFIDENCE_FILTER = 0;
+ private static final int MISSING_CONFIDENCE_LAST = -1;
+ private static final int DEFAULT_MISSING_CONFIDENCE_MODE = MISSING_CONFIDENCE_FILTER;
+ private static final Double MAX = Double.valueOf(Double.POSITIVE_INFINITY);
+ private static final Double MIN = Double.valueOf(Double.NEGATIVE_INFINITY);
+// private static final String ANNOTATION_PROCESSING_MODE_SINGLE = "single";
+// private static final String ANNOTATION_PROCESSING_MODE_UNION = "union";
+// private static final String DEFAULT_ANNOTATION_PROCESSING_MODE = ANNOTATION_PROCESSING_MODE_SINGLE;
+
+ Logger log = LoggerFactory.getLogger(SuggestionFunction.class);
+
+ private final String name;
+ private final IntTransformer<Resource> intTransformer;
+ private final StringTransformer<Resource> stringTransformer;
+ private final NodeSelector<Resource> suggestionSelector;
+ private final NodeSelector<Resource> confidenceSelector;
+ private final NodeSelector<Resource> resultSelector;
+ public SuggestionFunction(String name,
+ NodeSelector<Resource> suggestionSelector,
+ NodeSelector<Resource> confidenceSelector){
+ this(name,null,suggestionSelector,confidenceSelector);
+ }
+ public SuggestionFunction(String name,
+ NodeSelector<Resource> suggestionSelector,
+ NodeSelector<Resource> confidenceSelector,
+ NodeSelector<Resource> resultSelector) {
+ intTransformer = new IntTransformer<Resource>();
+ stringTransformer = new StringTransformer<Resource>();
+ if(name == null || name.isEmpty()){
+ throw new IllegalArgumentException("The parsed function name MUST NOT be NULL nor empty!");
+ }
+ this.name = name;
+ if(suggestionSelector == null){
+ throw new IllegalArgumentException("The NodeSelector used to select the Suggestions for the parsed Context MUST NOT be NULL!");
+ }
+ this.suggestionSelector = suggestionSelector;
+ if(confidenceSelector == null){
+ throw new IllegalArgumentException("The NodeSelector used to select the Confidence for Suggestions MUST NOT be NULL!");
+ }
+ this.confidenceSelector = confidenceSelector;
+ this.resultSelector = resultSelector;
+ }
+
+ @Override
+ public Collection<Resource> apply(final RDFBackend<Resource> backend, Collection<Resource>... args) throws IllegalArgumentException {
+ Integer limit = parseParamLimit(backend, args,1);
+// final String processingMode = parseParamProcessingMode(backend, args,2);
+ final int missingConfidenceMode = parseParamMissingConfidenceMode(backend, args,2);
+ List<Resource> result = new ArrayList<Resource>();
+// if(processingMode.equals(ANNOTATION_PROCESSING_MODE_UNION)){
+ processAnnotations(backend, args[0], limit, missingConfidenceMode, result);
+// } else {
+// for(Resource context : args[0]){
+// processAnnotations(backend, singleton(context),
+// limit, missingConfidenceMode, result);
+// }
+// }
+ return result;
+ }
+ /**
+ * Suggestions are selected by all Annotations returned by the parsed
+ * {@link #annotationSelector}.
+ * @param backend
+ * @param annotations suggestions are selected for the union of the parsed
+ * annotations - the {limit} most linked entities for the parsed
+ * list of annotations.
+ * @param limit the maximum number of suggestions for the parsed collection
+ * of annotations.
+ * @param missingConfidenceMode
+ * @param result results are added to this list.
+ */
+ private void processAnnotations(final RDFBackend<Resource> backend,
+ Collection<Resource> annotations,
+ Integer limit,
+ final int missingConfidenceMode,
+ List<Resource> result) {
+ List<Entry<Double,Resource>> suggestions = new ArrayList<Entry<Double,Resource>>();
+ for(Resource annotation : annotations){
+ for(Resource suggestion : suggestionSelector.select(backend, annotation)){
+ Collection<Resource> cs = confidenceSelector.select(backend, suggestion);
+ Double confidence = !cs.isEmpty() ? backend.doubleValue(cs.iterator().next()) :
+ missingConfidenceMode == MISSING_CONFIDENCE_FILTER ?
+ null : missingConfidenceMode == MISSING_CONFIDENCE_FIRST ?
+ MAX : MIN;
+ if(confidence != null){
+ suggestions.add(singletonMap(confidence,suggestion).entrySet().iterator().next());
+
+ }
+ }
+ }
+ Collections.sort(suggestions, SUGGESTION_COMPARATOR);
+ int resultSize = limit != null ? Math.min(limit, suggestions.size()) : suggestions.size();
+ for(Entry<Double,Resource> suggestion : suggestions.subList(0, resultSize)){
+ if(resultSelector == null){
+ result.add(suggestion.getValue());
+ } else {
+ result.addAll(resultSelector.select(backend, suggestion.getValue()));
+ }
+ }
+ }
+ /*
+ * Helper Method to parse the parameter
+ */
+ /**
+ * @param backend
+ * @param args
+ * @return
+ */
+ private int parseParamMissingConfidenceMode(final RDFBackend<Resource> backend,
+ Collection<Resource>[] args, int index) {
+ final int missingConfidenceMode;
+ if(args.length > index && !args[index].isEmpty()){
+ String mode = stringTransformer.transform(backend, args[index].iterator().next());
+ if("first".equalsIgnoreCase(mode)){
+ missingConfidenceMode = MISSING_CONFIDENCE_FIRST;
+ } else if("last".equalsIgnoreCase(mode)){
+ missingConfidenceMode = MISSING_CONFIDENCE_LAST;
+ } else if("filter".equalsIgnoreCase(mode)){
+ missingConfidenceMode = MISSING_CONFIDENCE_FILTER;
+ } else {
+ missingConfidenceMode = DEFAULT_MISSING_CONFIDENCE_MODE;
+ log.warn("Unknown value for parameter 'missing confidence value mode' '{}'" +
+ "(supported: 'first','last','filter') use default: 'filter')",mode);
+ }
+ } else {
+ missingConfidenceMode = DEFAULT_MISSING_CONFIDENCE_MODE;
+ }
+ return missingConfidenceMode;
+ }
+// /**
+// * @param backend
+// * @param args
+// * @return
+// */
+// private String parseParamProcessingMode(final RDFBackend<Resource> backend, Collection<Resource>[] args, int index) {
+// final String processingMode;
+// if(args.length > index && !args[index].isEmpty()){
+// String mode = stringTransformer.transform(backend, args[index].iterator().next());
+// if(ANNOTATION_PROCESSING_MODE_SINGLE.equalsIgnoreCase(mode)){
+// processingMode = ANNOTATION_PROCESSING_MODE_SINGLE;
+// } else if(ANNOTATION_PROCESSING_MODE_UNION.equalsIgnoreCase(mode)) {
+// processingMode = ANNOTATION_PROCESSING_MODE_UNION;
+// } else {
+// processingMode = DEFAULT_ANNOTATION_PROCESSING_MODE;
+// log.warn("Unknown value for parameter 'annotation processing mode' '{}'" +
+// "(supported: 'single','union') default: 'single')",mode);
+// }
+// } else {
+// processingMode = DEFAULT_ANNOTATION_PROCESSING_MODE;
+// }
+// return processingMode;
+// }
+ /**
+ * @param backend
+ * @param args
+ * @return
+ */
+ private Integer parseParamLimit(final RDFBackend<Resource> backend, Collection<Resource>[] args,int index) {
+ Integer limit = null;
+ if(args.length > index && !args[index].isEmpty()){
+ Resource value = args[index].iterator().next();
+ try {
+ limit = intTransformer.transform(backend, value);
+ if(limit < 1){
+ limit = null;
+ }
+ } catch (RuntimeException e) {
+ log.warn("Unable to parse parameter 'limit' form the 2nd argument '{}'",value);
+ }
+ }
+ return limit;
+ }
+
+
+ @Override
+ public String getPathExpression(RDFBackend<Resource> backend) {
+ return name;
+ }
+
+
+}
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/SuggestionFunction.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/TextAnnotationFunction.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/TextAnnotationFunction.java?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/TextAnnotationFunction.java (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/TextAnnotationFunction.java Fri Feb 24 13:32:19 2012
@@ -0,0 +1,94 @@
+package org.apache.stanbol.enhancer.ldpath.function;
+
+import static org.apache.stanbol.enhancer.ldpath.utils.Utils.parseSelector;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_EXTRACTED_FROM;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.Resource;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import at.newmedialab.ldpath.api.backend.RDFBackend;
+import at.newmedialab.ldpath.api.functions.SelectorFunction;
+import at.newmedialab.ldpath.api.selectors.NodeSelector;
+import at.newmedialab.ldpath.model.selectors.PropertySelector;
+import at.newmedialab.ldpath.parser.ParseException;
+
+public class TextAnnotationFunction implements SelectorFunction<Resource> {
+
+ private final Logger log = LoggerFactory.getLogger(TextAnnotationFunction.class);
+
+ private static final String FUNCTION_NAME = "textAnnotation";
+ private static NodeSelector<Resource> selector;
+ static {
+ String path = String.format("^%s[%s is %s]",
+ ENHANCER_EXTRACTED_FROM,RDF_TYPE,ENHANCER_TEXTANNOTATION);
+ try {
+ selector = parseSelector(path);
+ } catch (ParseException e) {
+ throw new IllegalStateException("Unable to parse the ld-path selector '" +
+ path + "'used by the 'fn:" + FUNCTION_NAME + "'!", e);
+ }
+ }
+ private static NodeSelector<Resource> dcTypeSelector = new PropertySelector<Resource>(DC_TYPE);
+
+ public TextAnnotationFunction() {
+ }
+
+ @Override
+ public Collection<Resource> apply(RDFBackend<Resource> backend, Collection<Resource>... args) {
+ if(args == null || args.length < 1 || args[0] == null || args[0].isEmpty()){
+ throw new IllegalArgumentException("The 'fn:"+FUNCTION_NAME+"' function " +
+ "requires at least a single none empty parameter (the context). Use 'fn:" +
+ FUNCTION_NAME+"(.)' to execute it on the path context!");
+ }
+ Set<Resource> textAnnotations = new HashSet<Resource>();
+ for(Resource context : args[0]){
+ textAnnotations.addAll(selector.select(backend, context));
+ }
+// NOTE: parsing of the dc:type as parameter is deactivated for now, because
+// See the NOTES within this commented seciton for details why.
+// final UriRef dcTypeConstraint;
+// if(args.length < 2 || args[1].isEmpty()){
+// dcTypeConstraint = null;
+// } else {
+// /*
+// * NOTES:
+// *
+// * * Parameters MUST BE parsed as Literals, because otherwise LDPATH
+// * would execute them rather than directly parsing them
+// * * Namespace prefixes can not be supported for URIs parsed as
+// * Literals, because the prefix mappings are only known by the
+// * ldpath parser and not available to this component.
+// */
+// Resource value = args[1].iterator().next();
+// if(value instanceof Literal){
+// dcTypeConstraint = new UriRef(((Literal)value).getLexicalForm());
+// } else {
+// log.warn("Unable to use dc:type constraint {} (value MUST BE a Literal)!",value);
+// dcTypeConstraint = null;
+// }
+// }
+// if(dcTypeConstraint != null){
+// NodeTest<Resource> dcTypeFilter = new PathEqualityTest<Resource>(dcTypeSelector, dcTypeConstraint);
+// Iterator<Resource> it = textAnnotations.iterator();
+// while(it.hasNext()){
+// if(!dcTypeFilter.apply(backend, Collections.singleton(it.next()))){
+// it.remove();
+// }
+// }
+// }
+ return textAnnotations;
+ }
+
+ @Override
+ public String getPathExpression(RDFBackend<Resource> backend) {
+ return FUNCTION_NAME;
+ }
+}
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/function/TextAnnotationFunction.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/utils/Utils.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/utils/Utils.java?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/utils/Utils.java (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/utils/Utils.java Fri Feb 24 13:32:19 2012
@@ -0,0 +1,58 @@
+package org.apache.stanbol.enhancer.ldpath.utils;
+
+import static org.apache.stanbol.enhancer.ldpath.EnhancerLDPath.getConfig;
+
+import java.io.StringReader;
+import java.util.Map;
+
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.stanbol.commons.ldpath.clerezza.ClerezzaBackend;
+
+import at.newmedialab.ldpath.api.backend.RDFBackend;
+import at.newmedialab.ldpath.api.selectors.NodeSelector;
+import at.newmedialab.ldpath.parser.ParseException;
+import at.newmedialab.ldpath.parser.RdfPathParser;
+
+public final class Utils {
+
+ private Utils(){};
+
+
+
+ public static RDFBackend<Resource> EMPTY_BACKEND;
+
+ /**
+ * Returns an empty {@link RDFBackend} instance intended to be used to create
+ * {@link RdfPathParser} instances<p>
+ * {@link RDFBackend} has currently two distinct roles <ol>
+ * <li> to traverse the graph ( basically the
+ * {@link RDFBackend#listObjects(Object, Object)} and
+ * {@link RDFBackend#listSubjects(Object, Object)} methods)
+ * <li> to create Nodes and convert Nodes
+ * </ol>
+ * The {@link RdfPathParser} while requiring an {@link RDFBackend} instance
+ * depends only on the 2nd role. Therefore the data managed by the
+ * {@link RDFBackend} instance are of no importance.<p>
+ * The {@link RDFBackend} provided by this constant is intended to be only
+ * used for the 2nd purpose and does contain no information!
+ * <li>
+ */
+ public static RDFBackend<Resource> getEmptyBackend(){
+ if(EMPTY_BACKEND == null){
+ EMPTY_BACKEND = new ClerezzaBackend(new SimpleMGraph());
+ }
+ return EMPTY_BACKEND;
+ }
+
+
+
+ public static NodeSelector<Resource> parseSelector(String path) throws ParseException {
+ return parseSelector(path, (Map<String,String>)null);
+ }
+ public static NodeSelector<Resource> parseSelector(String path, Map<String,String> additionalNamespaceMappings) throws ParseException {
+ RdfPathParser<Resource> parser = new RdfPathParser<Resource>(
+ getEmptyBackend(), getConfig(), new StringReader(path));
+ return parser.parseSelector(additionalNamespaceMappings);
+ }
+}
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/main/java/org/apache/stanbol/enhancer/ldpath/utils/Utils.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/ldpath/ContentItemBackendTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/ldpath/ContentItemBackendTest.java?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/ldpath/ContentItemBackendTest.java (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/ldpath/ContentItemBackendTest.java Fri Feb 24 13:32:19 2012
@@ -0,0 +1,343 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.stanbol.enhancer.ldpath;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertFalse;
+import static junit.framework.Assert.assertNotNull;
+import static junit.framework.Assert.assertTrue;
+
+import java.io.BufferedInputStream;
+import java.io.FilterInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.serializedform.ParsingProvider;
+import org.apache.clerezza.rdf.core.serializedform.SupportedFormat;
+import org.apache.clerezza.rdf.jena.parser.JenaParserProvider;
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.commons.indexedgraph.IndexedMGraph;
+import org.apache.stanbol.enhancer.ldpath.backend.ContentItemBackend;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryBlob;
+import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryContentItem;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import at.newmedialab.ldpath.LDPath;
+import at.newmedialab.ldpath.exception.LDPathParseException;
+
+public class ContentItemBackendTest {
+ /**
+ * Avoids that the parser closes the {@link ZipInputStream} after the
+ * first entry
+ */
+ protected static class UncloseableStream extends FilterInputStream {
+
+ public UncloseableStream(InputStream in) {
+ super(in);
+ }
+ @Override
+ public void close() throws IOException {
+ }
+ }
+
+ private Logger log = LoggerFactory.getLogger(ContentItemBackendTest.class);
+ private static final Charset UTF8 = Charset.forName("UTF-8");
+ private static LiteralFactory lf = LiteralFactory.getInstance();
+
+ private static String textContent;
+ private static String htmlContent;
+ private static ContentItem ci;
+ private ContentItemBackend backend;
+ private LDPath<Resource> ldpath;
+ @BeforeClass
+ public static void readTestData() throws IOException {
+ //add the metadata
+ ParsingProvider parser = new JenaParserProvider();
+ //create the content Item with the HTML content
+ MGraph rdfData = parseRdfData(parser,"metadata.rdf.zip");
+ UriRef contentItemId = null;
+ Iterator<Triple> it = rdfData.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
+ while(it.hasNext()){
+ Resource r = it.next().getObject();
+ if(contentItemId == null){
+ if(r instanceof UriRef){
+ contentItemId = (UriRef)r;
+ }
+ } else {
+ assertEquals("multiple ContentItems IDs contained in the RDF test data",
+ contentItemId,r);
+ }
+ }
+ assertNotNull("RDF data doe not contain an Enhancement extracted form " +
+ "the content item",contentItemId);
+
+ InputStream in = getTestResource("content.html");
+ assertNotNull("HTML content not found",in);
+ byte[] htmlData = IOUtils.toByteArray(in);
+ IOUtils.closeQuietly(in);
+ ci = new InMemoryContentItem(contentItemId.getUnicodeString(),
+ htmlData, "text/html; charset=UTF-8");
+ htmlContent = new String(htmlData, UTF8);
+ //create a Blob with the text content
+ in = getTestResource("content.txt");
+ byte[] textData = IOUtils.toByteArray(in);
+ IOUtils.closeQuietly(in);
+ assertNotNull("Plain text content not found",in);
+ ci.addPart(new UriRef(ci.getUri().getUnicodeString()+"_text"),
+ new InMemoryBlob(textData, "text/plain; charset=UTF-8"));
+ textContent = new String(textData, UTF8);
+ //add the metadata
+ ci.getMetadata().addAll(rdfData);
+ }
+
+ /**
+ * @param parser
+ * @return
+ * @throws IOException
+ */
+ protected static MGraph parseRdfData(ParsingProvider parser,String name) throws IOException {
+ MGraph rdfData = new IndexedMGraph();
+ InputStream in = getTestResource(name);
+ assertNotNull("File '"+name+"' not found",in);
+ ZipInputStream zipIn = new ZipInputStream(new BufferedInputStream(in));
+ InputStream uncloseable = new UncloseableStream(zipIn);
+ ZipEntry entry;
+ while((entry = zipIn.getNextEntry()) != null){
+ if(entry.getName().endsWith(".rdf")){
+ parser.parse(rdfData,uncloseable, SupportedFormat.RDF_XML,null);
+ }
+ }
+ assertTrue(rdfData.size() > 0);
+ zipIn.close();
+ return rdfData;
+ }
+
+ /**
+ * @return
+ */
+ protected static InputStream getTestResource(String resourceName) {
+ InputStream in = ContentItemBackendTest.class.getClassLoader().getResourceAsStream(resourceName);
+ return in;
+ }
+
+ @Before
+ public void initBackend(){
+ if(backend == null){
+ backend = new ContentItemBackend(ci);
+ }
+ if(ldpath == null){
+ ldpath = new LDPath<Resource>(backend, EnhancerLDPath.getConfig());
+ }
+ }
+
+ @Test
+ public void testContent() throws LDPathParseException {
+ Collection<Resource> result = ldpath.pathQuery(ci.getUri(), "fn:content(\"text/plain\")", null);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ assertTrue(result.size() == 1);
+ Resource r = result.iterator().next();
+ assertTrue(r instanceof Literal);
+ String content = ((Literal)r).getLexicalForm();
+ assertEquals(content, textContent);
+
+ result = ldpath.pathQuery(ci.getUri(), "fn:content(\"text/html\")", null);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ assertTrue(result.size() == 1);
+ r = result.iterator().next();
+ assertTrue(r instanceof Literal);
+ content = ((Literal)r).getLexicalForm();
+ assertEquals(content, htmlContent);
+ }
+ @Test
+ public void testTextAnnotationFunction() throws LDPathParseException {
+ String path = "fn:textAnnotation(.)/fise:selected-text";
+ Collection<Resource> result = ldpath.pathQuery(ci.getUri(), path, null);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ assertTrue(result.size() == 2);
+ Set<String> expectedValues = new HashSet<String>(
+ Arrays.asList("Bob Marley","Paris"));
+ for(Resource r : result){
+ assertTrue(r instanceof Literal);
+ assertTrue(expectedValues.remove(((Literal)r).getLexicalForm()));
+ }
+ assertTrue(expectedValues.isEmpty());
+
+ //test with a filter for the type
+ //same as the 1st example bat rather using an ld-path construct for
+ //filtering for TextAnnotations representing persons
+ path = "fn:textAnnotation(.)[dc:type is dbpedia-ont:Person]/fise:selected-text";
+ result = ldpath.pathQuery(ci.getUri(), path, null);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ assertTrue(result.size() == 1);
+ Resource r = result.iterator().next();
+ assertTrue(r instanceof Literal);
+ assertEquals(((Literal)r).getLexicalForm(), "Bob Marley");
+
+ }
+ @Test
+ public void testEntityAnnotation() throws LDPathParseException {
+ String path = "fn:entityAnnotation(.)/fise:entity-reference";
+ Collection<Resource> result = ldpath.pathQuery(ci.getUri(), path, null);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ assertTrue(result.size() == 4);
+ Set<UriRef> expectedValues = new HashSet<UriRef>(
+ Arrays.asList(
+ new UriRef("http://dbpedia.org/resource/Paris"),
+ new UriRef("http://dbpedia.org/resource/Bob_Marley"),
+ new UriRef("http://dbpedia.org/resource/Centre_Georges_Pompidou"),
+ new UriRef("http://dbpedia.org/resource/Paris,_Texas")));
+ for(Resource r : result){
+ assertTrue(r instanceof UriRef);
+ log.info("Entity: {}",r);
+ assertTrue(expectedValues.remove(r));
+ }
+ assertTrue(expectedValues.isEmpty());
+ //and with a filter
+ path = "fn:entityAnnotation(.)[fise:entity-type is dbpedia-ont:Person]/fise:entity-reference";
+ result = ldpath.pathQuery(ci.getUri(), path, null);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ assertTrue(result.size() == 1);
+ assertTrue(result.contains(new UriRef("http://dbpedia.org/resource/Bob_Marley")));
+ }
+ @Test
+ public void testEnhancements() throws LDPathParseException {
+ String path = "fn:enhancement(.)";
+ Collection<Resource> result = ldpath.pathQuery(ci.getUri(), path, null);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ assertTrue(result.size() == 7);
+ for(Resource r : result){
+ assertTrue(r instanceof UriRef);
+ log.info("Entity: {}",r);
+ }
+ //and with a filter
+ path = "fn:enhancement(.)[rdf:type is fise:TextAnnotation]";
+ result = ldpath.pathQuery(ci.getUri(), path, null);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ assertTrue(result.size() == 3);
+// assertTrue(result.contains(new UriRef("http://dbpedia.org/resource/Bob_Marley")));
+ path = "fn:enhancement(.)/dc:language";
+ result = ldpath.pathQuery(ci.getUri(), path, null);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ assertTrue(result.size() == 1);
+ Resource r = result.iterator().next();
+ assertTrue(r instanceof Literal);
+ assertEquals("en",((Literal)r).getLexicalForm());
+ }
+ @Test
+ public void testEntitySuggestions() throws LDPathParseException {
+ //NOTE: Sort while supported by fn:suggestion is currently not
+ // supported by LDPath. Therefore the sort of fn:suggestion can
+ // currently only ensure the the top most {limit} entities are
+ // selected if the "limit" parameter is set.
+ // Because this test checks first that all three suggestions for Paris
+ // are returned and later that a limit of 2 only returns the two top
+ // most.
+ String path = "fn:textAnnotation(.)[dc:type is dbpedia-ont:Place]/fn:suggestion(.)";
+ Collection<Resource> result = ldpath.pathQuery(ci.getUri(), path, null);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ assertTrue(result.size() == 3);
+ Double lowestConfidence = null;
+ //stores the lowest confidence suggestion for the 2nd part of this test
+ UriRef lowestConfidenceSuggestion = null;
+ path = "fise:confidence :: xsd:double";
+ for(Resource r : result){
+ assertTrue(r instanceof UriRef);
+ log.info("confidence: {}",r);
+ Double current = (Double)ldpath.pathTransform(r, path, null).iterator().next();
+ assertNotNull(current);
+ if(lowestConfidence == null || lowestConfidence > current){
+ lowestConfidence = current;
+ lowestConfidenceSuggestion = (UriRef) r;
+ }
+ }
+ assertNotNull(lowestConfidenceSuggestion);
+ path = "fn:textAnnotation(.)[dc:type is dbpedia-ont:Place]/fn:suggestion(.,\"2\")";
+ Collection<Resource> result2 = ldpath.pathQuery(ci.getUri(), path, null);
+ assertNotNull(result2);
+ assertFalse(result2.isEmpty());
+ assertTrue(result2.size() == 2);
+ //first check that all results of the 2nd query are also part of the first
+ assertTrue(result.containsAll(result2));
+ //secondly check that the lowest confidence suggestion is now missing
+ assertFalse(result2.contains(lowestConfidenceSuggestion));
+ }
+ @Test
+ public void testSuggestedEntity() throws LDPathParseException {
+ //The suggestedEntity function can be used for twi usecases
+ //(1) get the {limit} top rated linked Entities per parsed context
+ // In this example we parse all TextAnnotations
+ //NOTE: '.' MUST BE used as first argument in this case
+ String path = "fn:textAnnotation(.)/fn:suggestedEntity(.,\"1\")";
+ Collection<Resource> result = ldpath.pathQuery(ci.getUri(), path, null);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ assertTrue(result.size() == 2);
+ Set<UriRef> expectedValues = new HashSet<UriRef>(
+ Arrays.asList(
+ new UriRef("http://dbpedia.org/resource/Paris"),
+ new UriRef("http://dbpedia.org/resource/Bob_Marley")));
+ for(Resource r : result){
+ assertTrue(r instanceof UriRef);
+ log.info("Entity: {}",r);
+ assertTrue(expectedValues.remove(r));
+ }
+ assertTrue(expectedValues.isEmpty());
+
+ //(2) get the {limit} top rated Entities for all Annotations parsed
+ // as the first argument
+ //NOTE: the selector parsing all Annotations MUST BE used as first
+ // argument
+ path = "fn:suggestedEntity(fn:textAnnotation(.),\"1\")";
+ result = ldpath.pathQuery(ci.getUri(), path, null);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ assertTrue(result.size() == 1);
+ assertEquals(new UriRef("http://dbpedia.org/resource/Paris"),
+ result.iterator().next());
+
+ }
+}
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/ldpath/ContentItemBackendTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/ldpath/UsageExamples.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/ldpath/UsageExamples.java?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/ldpath/UsageExamples.java (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/ldpath/UsageExamples.java Fri Feb 24 13:32:19 2012
@@ -0,0 +1,240 @@
+package org.apache.stanbol.enhancer.ldpath;
+
+import static junit.framework.Assert.assertEquals;
+import static junit.framework.Assert.assertNotNull;
+import static org.apache.stanbol.enhancer.ldpath.ContentItemBackendTest.getTestResource;
+import static org.apache.stanbol.enhancer.ldpath.ContentItemBackendTest.parseRdfData;
+import static org.junit.Assert.assertFalse;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringReader;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.serializedform.ParsingProvider;
+import org.apache.clerezza.rdf.jena.parser.JenaParserProvider;
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.enhancer.ldpath.backend.ContentItemBackend;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.helper.InMemoryContentItem;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import at.newmedialab.ldpath.LDPath;
+import at.newmedialab.ldpath.exception.LDPathParseException;
+import at.newmedialab.ldpath.model.programs.Program;
+
+/**
+ * Uses the "example.*" files to build a contentItem. This contains a big
+ * number of Text/EntityAnnotation and is used here to provide useage examples
+ * of the Stanbol Enhancer LDPath functions.<p>
+ * In addition setting the {@value #ITERATIONS} parameter to a value >= 100
+ * is a good option to do performance testing. Lower values suffer from
+ * JIT optimisation. Indexing times on my Machine (1:340ms , 10:100ms,
+ * 100:66ms, 1000:60ms)
+ *
+ * @author Rupert Westenthaler
+ *
+ */
+public class UsageExamples {
+
+ private static final Logger log = LoggerFactory.getLogger(UsageExamples.class);
+
+ private static int ITERATIONS = 10;
+
+ private static ContentItem ci;
+ private ContentItemBackend backend;
+ private LDPath<Resource> ldpath;
+ private static double indexingTime;
+
+ @BeforeClass
+ public static void readTestData() throws IOException {
+ //add the metadata
+ ParsingProvider parser = new JenaParserProvider();
+ //create the content Item with the HTML content
+ MGraph rdfData = parseRdfData(parser,"example.rdf.zip");
+ UriRef contentItemId = null;
+ Iterator<Triple> it = rdfData.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
+ while(it.hasNext()){
+ Resource r = it.next().getObject();
+ if(contentItemId == null){
+ if(r instanceof UriRef){
+ contentItemId = (UriRef)r;
+ }
+ } else {
+ assertEquals("multiple ContentItems IDs contained in the RDF test data",
+ contentItemId,r);
+ }
+ }
+ assertNotNull("RDF data doe not contain an Enhancement extracted form " +
+ "the content item",contentItemId);
+
+ InputStream in = getTestResource("example.txt");
+ assertNotNull("Example Plain text content not found",in);
+ byte[] textData = IOUtils.toByteArray(in);
+ IOUtils.closeQuietly(in);
+ ci = new InMemoryContentItem(contentItemId.getUnicodeString(),
+ textData, "text/html; charset=UTF-8");
+ ci.getMetadata().addAll(rdfData);
+ }
+ @Before
+ public void initBackend(){
+ if(backend == null){
+ backend = new ContentItemBackend(ci);
+ }
+ if(ldpath == null){
+ ldpath = new LDPath<Resource>(backend, EnhancerLDPath.getConfig());
+ }
+ }
+
+ /**
+ * This provides some example on how to select persons extracted from
+ * a contentItem
+ * @throws LDPathParseException
+ */
+ @Test
+ public void exampleExtractedPersons() throws LDPathParseException {
+ StringBuilder program = new StringBuilder();
+ program.append("personMentions = fn:textAnnotation(.)" +
+ "[dc:type is dbpedia-ont:Person]/fise:selected-text :: xsd:string;");
+ //this uses the labels of suggested person with the highest confidence
+ //but also the selected-text as fallback if no entity is suggested.
+ program.append("personNames = fn:textAnnotation(.)" +
+ "[dc:type is dbpedia-ont:Person]/fn:first(fn:suggestion(.,\"1\")/fise:entity-label,fise:selected-text) :: xsd:string;");
+ program.append("linkedPersons = fn:textAnnotation(.)" +
+ "[dc:type is dbpedia-ont:Person]/fn:suggestedEntity(.,\"1\") :: xsd:anyURI;");
+ //this selects only linked Artists
+ program.append("linkedArtists = fn:textAnnotation(.)" +
+ "[dc:type is dbpedia-ont:Person]/fn:suggestion(.)" +
+ "[fise:entity-type is dbpedia-ont:Artist]/fise:entity-reference :: xsd:anyURI;");
+ Program<Resource> personProgram = ldpath.parseProgram(new StringReader(program.toString()));
+ log.info("- - - - - - - - - - - - - ");
+ log.info("Person Indexing Examples");
+ Map<String,Collection<?>> result = execute(personProgram);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ logResults(result);
+ }
+ /**
+ * Execute the ldpath program {@link #ITERATIONS} times and adds the
+ * average execution time to {@link #indexingTime}
+ * @param personProgram
+ * @return the results
+ */
+ private Map<String,Collection<?>> execute(Program<Resource> personProgram) {
+ long start = System.currentTimeMillis();
+ Map<String,Collection<?>> result = personProgram.execute(backend, ci.getUri());
+ for(int i=1;i<ITERATIONS;i++){
+ result = personProgram.execute(backend, ci.getUri());
+ }
+ double duration = ((double)(System.currentTimeMillis()-start))/((double)Math.max(1, ITERATIONS));
+ log.info("processing time {}ms (average over {} iterations)",duration,Math.max(1, ITERATIONS));
+ indexingTime = indexingTime+duration;
+ return result;
+ }
+ /**
+ * This provides some example on how to select persons extracted from
+ * a contentItem
+ * @throws LDPathParseException
+ */
+ @Test
+ public void exampleExtractedPlaces() throws LDPathParseException {
+ StringBuilder program = new StringBuilder();
+ program.append("locationMentions = fn:textAnnotation(.)" +
+ "[dc:type is dbpedia-ont:Place]/fise:selected-text :: xsd:string;");
+ //this uses the labels of suggested places with the highest confidence
+ //but also the selected-text as fallback if no entity is suggested.
+ program.append("locationNames = fn:textAnnotation(.)" +
+ "[dc:type is dbpedia-ont:Place]/fn:first(fn:suggestion(.,\"1\")/fise:entity-label,fise:selected-text) :: xsd:string;");
+ program.append("linkedPlaces = fn:textAnnotation(.)" +
+ "[dc:type is dbpedia-ont:Place]/fn:suggestedEntity(.,\"1\") :: xsd:anyURI;");
+ //this selects only linked Artists
+ program.append("linkedCountries = fn:textAnnotation(.)" +
+ "[dc:type is dbpedia-ont:Place]/fn:suggestion(.)" +
+ "[fise:entity-type is dbpedia-ont:Country]/fise:entity-reference :: xsd:anyURI;");
+ Program<Resource> personProgram = ldpath.parseProgram(new StringReader(program.toString()));
+ log.info("- - - - - - - - - - - - -");
+ log.info("Places Indexing Examples");
+ Map<String,Collection<?>> result = execute(personProgram);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ logResults(result);
+ }
+ /**
+ * This provides some example on how to select persons extracted from
+ * a contentItem
+ * @throws LDPathParseException
+ */
+ @Test
+ public void exampleExtractedOrganization() throws LDPathParseException {
+ StringBuilder program = new StringBuilder();
+ program.append("orgMentions = fn:textAnnotation(.)" +
+ "[dc:type is dbpedia-ont:Organisation]/fise:selected-text :: xsd:string;");
+ //this uses the labels of suggested organisations with the highest confidence
+ //but also the selected-text as fallback if no entity is suggested.
+ program.append("orgNames = fn:textAnnotation(.)" +
+ "[dc:type is dbpedia-ont:Organisation]/fn:first(fn:suggestion(.,\"1\")/fise:entity-label,fise:selected-text) :: xsd:string;");
+ program.append("linkedOrgs = fn:textAnnotation(.)" +
+ "[dc:type is dbpedia-ont:Organisation]/fn:suggestedEntity(.,\"1\") :: xsd:anyURI;");
+ //this selects only linked education organisations
+ //NOTE: this does not use a limit on suggestion(.)!
+ program.append("linkedEducationOrg = fn:textAnnotation(.)" +
+ "[dc:type is dbpedia-ont:Organisation]/fn:suggestion(.)" +
+ "[fise:entity-type is dbpedia-ont:EducationalInstitution]/fise:entity-reference :: xsd:anyURI;");
+ Program<Resource> personProgram = ldpath.parseProgram(new StringReader(program.toString()));
+ log.info("- - - - - - - - - - - - -");
+ log.info("Places Indexing Examples");
+ Map<String,Collection<?>> result = execute(personProgram);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ logResults(result);
+ }
+ /**
+ * This provides some example on how to select persons extracted from
+ * a contentItem
+ * @throws LDPathParseException
+ */
+ @Test
+ public void exampleExtractedConcepts() throws LDPathParseException {
+ StringBuilder program = new StringBuilder();
+ program.append("conceptNames = fn:entityAnnotation(.)" +
+ "[fise:entity-type is skos:Concept]/fise:entity-label :: xsd:anyURI;");
+ //this uses the labels of suggested person with the highest confidence
+ //but also the selected-text as fallback if no entity is suggested.
+ program.append("linkedConcepts = fn:entityAnnotation(.)" +
+ "[fise:entity-type is skos:Concept]/fise:entity-reference :: xsd:anyURI;");
+ Program<Resource> personProgram = ldpath.parseProgram(new StringReader(program.toString()));
+ log.info("- - - - - - - - - - - - -");
+ log.info("Concept Indexing Examples");
+ Map<String,Collection<?>> result = execute(personProgram);
+ assertNotNull(result);
+ assertFalse(result.isEmpty());
+ logResults(result);
+ }
+
+ protected static void logResults(Map<String,Collection<?>> result){
+ for(Entry<String,Collection<?>> field : result.entrySet()){
+ log.info("Field {}: {} values",field.getKey(),field.getValue().size());
+ for(Object value : field.getValue()){
+ log.info(" {} (type: '{}')",value,value.getClass().getSimpleName());
+ }
+ }
+ }
+ @AfterClass
+ public static void printDuration(){
+ log.info("- - - - - - - - - - - - - - - - - - - - - - - - - ");
+ log.info("Indexing Time: {}ms (average over {} iterations)",indexingTime,Math.max(1, ITERATIONS));
+ }
+}
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/test/java/org/apache/stanbol/enhancer/ldpath/UsageExamples.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/content.html
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/content.html?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/content.html (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/content.html Fri Feb 24 13:32:19 2012
@@ -0,0 +1,6 @@
+<html>
+ <body>
+ The <b>Stanbol enhancer</b>b> can detect famous cities such as
+ Paris and people such as Bob Marley.
+ </body>
+</html>
\ No newline at end of file
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/content.html
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/content.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/content.txt?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/content.txt (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/content.txt Fri Feb 24 13:32:19 2012
@@ -0,0 +1 @@
+The Stanbol enhancer can detect famous cities such as Paris and people such as Bob Marley.
\ No newline at end of file
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/content.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/example.rdf.zip
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/example.rdf.zip?rev=1293245&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/example.rdf.zip
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/example.txt
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/example.txt?rev=1293245&view=auto
==============================================================================
--- incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/example.txt (added)
+++ incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/example.txt Fri Feb 24 13:32:19 2012
@@ -0,0 +1,32 @@
+Notable citizens of Salzburg
+
+Mozart's birthplace at Getreidegasse 9 The composer Wolfgang Amadeus Mozart was
+born and raised in Salzburg and worked for the archbishops from 1769 to 1781.
+His house of birth and residence are tourist attractions. His family is buried
+in a small church graveyard in the old town, and there are many monuments to
+"Wolferl" in the city. The composer Johann Michael Haydn, brother of the
+composer Joseph Haydn. His works were admired by Mozart and Schubert. He was
+also the teacher of Carl Maria von Weber and Anton Diabelli and is known for his
+sacred music. Christian Doppler, an expert on acoustic theory, was born in
+Salzburg. He is most known for his discovery of the Doppler effect. Josef Mohr
+was born in Salzburg. Together with Franz Gruber, he composed and wrote the text
+for "Silent Night". As a priest in neighbouring Oberndorf he performed the song
+for the first time in 1818. King Otto of Greece was born Prince Otto Friedrich
+Ludwig of Bavaria at the Palace of Mirabell, a few days before the city reverted
+from Bavarian to Austrian rule. Noted writer Stefan Zweig lived in Salzburg for
+about 15 years, until 1934. Maria Von Trapp (later Maria Trapp) and her family
+lived in Salzburg until they fled to the United States following the Nazi
+takeover.
+Salzburg is the birthplace of Hans Makart, a 19th-century Austrian
+painter-decorator and national celebrity. Makartplatz (Makart Square) is named
+in his honour. Writer Thomas Bernhard was raised in Salzburg and spent part of
+his life there. Herbert von Karajan was a notable musician and conductor. He was
+born in Salzburg and died in 1989 in neighbouring Anif. Anthropologist Udo Ludwig
+was born here. Roland Ratzenberger, Formula One driver, was born in Salzburg.
+He died in practice for the 1994 San Marino Grand Prix. Joseph Leutgeb, French
+horn virtuoso Klaus Ager, the distinguished contemporary composer and Mozarteum
+professor, was born in Salzburg on 10 May 1946. Alex Jesaulenko, Australian
+rules footballer and AFL Hall of Fame player with "Legend" status was born in
+Salzburg on 2 August 1945. Georg Trakl is one of the most important voices in
+German literature and he was also born in Salzburg. Theodor Herzl worked in the
+courts in Salzburg during the year after he earned his law degree in 1884.[6]
\ No newline at end of file
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/example.txt
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/metadata.rdf.zip
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/metadata.rdf.zip?rev=1293245&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/stanbol/trunk/enhancer/ldpath/src/test/resources/metadata.rdf.zip
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: incubator/stanbol/trunk/enhancer/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/pom.xml?rev=1293245&r1=1293244&r2=1293245&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/pom.xml (original)
+++ incubator/stanbol/trunk/enhancer/pom.xml Fri Feb 24 13:32:19 2012
@@ -48,6 +48,7 @@
<module>generic/chainmanager</module>
<module>jobmanager</module>
<module>jersey</module>
+ <module>ldpath</module>
<module>benchmark</module>
<module>engines</module>
<module>chain/allactive</module>
Modified: incubator/stanbol/trunk/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/pom.xml?rev=1293245&r1=1293244&r2=1293245&view=diff
==============================================================================
--- incubator/stanbol/trunk/pom.xml (original)
+++ incubator/stanbol/trunk/pom.xml Fri Feb 24 13:32:19 2012
@@ -65,6 +65,7 @@
<module>enhancer/chain/weighted</module>
<module>enhancer/chain/list</module>
<module>enhancer/jersey</module>
+ <module>enhancer/ldpath</module>
<module>enhancer/benchmark</module>
<module>enhancer/bundlelist</module>