You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/11/25 13:09:16 UTC
svn commit: r1545240 - in /stanbol/trunk/enhancement-engines: ./
dereference/ dereference/core/ dereference/core/src/
dereference/core/src/license/ dereference/core/src/main/
dereference/core/src/main/java/ dereference/core/src/main/java/org/
dereferen...
Author: rwesten
Date: Mon Nov 25 12:09:15 2013
New Revision: 1545240
URL: http://svn.apache.org/r1545240
Log:
STANBOL-336, STANBOL-1222: EntityDereference core module (first commit)
Added:
stanbol/trunk/enhancement-engines/dereference/
stanbol/trunk/enhancement-engines/dereference/core/ (with props)
stanbol/trunk/enhancement-engines/dereference/core/pom.xml
stanbol/trunk/enhancement-engines/dereference/core/src/
stanbol/trunk/enhancement-engines/dereference/core/src/license/
stanbol/trunk/enhancement-engines/dereference/core/src/license/THIRD-PARTY.properties
stanbol/trunk/enhancement-engines/dereference/core/src/main/
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceException.java
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java
stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java
stanbol/trunk/enhancement-engines/dereference/core/src/main/resources/
stanbol/trunk/enhancement-engines/dereference/core/src/test/
stanbol/trunk/enhancement-engines/dereference/core/src/test/java/
stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/
stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/
stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/
stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/
stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/
stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/
stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java
stanbol/trunk/enhancement-engines/dereference/core/src/test/resources/
Modified:
stanbol/trunk/enhancement-engines/pom.xml
Propchange: stanbol/trunk/enhancement-engines/dereference/core/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 25 12:09:15 2013
@@ -0,0 +1,7 @@
+target
+
+.project
+
+.settings
+
+.classpath
Added: stanbol/trunk/enhancement-engines/dereference/core/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/pom.xml?rev=1545240&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/pom.xml (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/pom.xml Mon Nov 25 12:09:15 2013
@@ -0,0 +1,122 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>apache-stanbol-enhancement-engines-entitylinking</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
+
+ <artifactId>org.apache.stanbol.enhancer.engines.dereference.core</artifactId>
+ <packaging>bundle</packaging>
+
+ <name>Apache Stanbol Enhancement Engine : Entity Dereference Core</name>
+ <description>
+ Implementation of an Entity Dereferencing Enigne defining an Extension point
+ for actual dereferencing implementations.
+ </description>
+
+ <inceptionYear>2013</inceptionYear>
+
+ <scm>
+ <connection>
+ scm:svn:http://svn.apache.org/repos/asf/stanbol/trunk/enhancement-engines/dereference/core/
+ </connection>
+ <developerConnection>
+ scm:svn:https://svn.apache.org/repos/asf/stanbol/trunk/enhancement-engines/dereference/core/
+ </developerConnection>
+ <url>http://stanbol.apache.org/</url>
+ </scm>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Import-Package>
+ org.apache.stanbol.enhancer.servicesapi; provide:=true; version="[0.11,1.1)",
+ org.apache.stanbol.enhancer.engines.dereference; provide:=true,
+ *
+ </Import-Package>
+ <Export-Package>
+ org.apache.stanbol.enhancer.engines.dereference;version=${project.version}
+ </Export-Package>
+ <!-- Private-Package>
+ </Private-Package -->
+ </instructions>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-scr-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </dependency>
+
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+
+ <!-- Testing -->
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency> <!-- used for debug level logging during tests -->
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-simple</artifactId>
+ <scope>test</scope>
+ </dependency>
+
+ </dependencies>
+
+</project>
Added: stanbol/trunk/enhancement-engines/dereference/core/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/license/THIRD-PARTY.properties?rev=1545240&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/license/THIRD-PARTY.properties (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/license/THIRD-PARTY.properties Mon Nov 25 12:09:15 2013
@@ -0,0 +1,24 @@
+# Generated by org.codehaus.mojo.license.AddThirdPartyMojo
+#-------------------------------------------------------------------------------
+# Already used licenses in project :
+# - Apache Software License
+# - Apache Software License, Version 2.0
+# - BSD License
+# - Common Development And Distribution License (CDDL), Version 1.0
+# - Common Development And Distribution License (CDDL), Version 1.1
+# - Common Public License, Version 1.0
+# - Eclipse Public License, Version 1.0
+# - GNU General Public License (GPL), Version 2 with classpath exception
+# - GNU Lesser General Public License (LGPL)
+# - GNU Lesser General Public License (LGPL), Version 2.1
+# - ICU License
+# - MIT License
+# - New BSD License
+# - Public Domain License
+#-------------------------------------------------------------------------------
+# Please fill the missing licenses for dependencies :
+#
+#
+#Mon Nov 25 13:04:19 CET 2013
+org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
+org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0
Added: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceException.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceException.java?rev=1545240&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceException.java (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceException.java Mon Nov 25 12:09:15 2013
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dereference;
+
+import org.apache.clerezza.rdf.core.UriRef;
+
+public class DereferenceException extends Exception {
+
+ private static final long serialVersionUID = 1524436328783083428L;
+
+ public DereferenceException(UriRef entity, Throwable t){
+ super("Unable to dereference Entity " + entity+ "!", t);
+ }
+
+}
Added: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java?rev=1545240&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferenceEngine.java Mon Nov 25 12:09:15 2013
@@ -0,0 +1,281 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dereference;
+
+import static org.apache.stanbol.enhancer.servicesapi.ServiceProperties.ENHANCEMENT_ENGINE_ORDERING;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+
+import java.io.IOError;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.locks.Lock;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.commons.lang.StringUtils;
+import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class EntityDereferenceEngine implements EnhancementEngine, ServiceProperties {
+
+ private final Logger log = LoggerFactory.getLogger(EntityDereferenceEngine.class);
+
+ /**
+ * By default the EntityDereferenceEngine does use {@link ServiceProperties#ORDERING_POST_PROCESSING}
+ */
+ public static final int DEFAULT_ENGINE_ORDERING = ServiceProperties.ORDERING_POST_PROCESSING;
+
+ /**
+ * If the offline mode is enabled enforced for dereferencing Entities
+ */
+ private boolean offline;
+
+ protected final EntityDereferencer dereferencer;
+
+ protected final String name;
+
+ /**
+ * The Map holding the {@link #serviceProperties} for this engine.
+ */
+ protected final Map<String,Object> serviceProperties = new HashMap<String,Object>();
+ /**
+ * Unmodifiable view over {@link #serviceProperties} returned by
+ * {@link #getServiceProperties()}
+ */
+ private final Map<String,Object> unmodServiceProperties = Collections.unmodifiableMap(serviceProperties);
+
+ public EntityDereferenceEngine(String name, EntityDereferencer dereferencer){
+ if(StringUtils.isBlank(name)){
+ throw new IllegalArgumentException("The parsed EnhancementEngine name MUST NOT be NULL nor empty!");
+ }
+ this.name = name;
+ if(dereferencer == null){
+ throw new IllegalArgumentException("The parsed EntityDereferencer MUST NOT be NULL!");
+ }
+ this.dereferencer = dereferencer;
+ }
+
+ /**
+ * Setter for the offline mode. This method is typically called of
+ * {@link OfflineMode} is injected to the component registering an instance
+ * of this Engine implementation
+ * @param mode the offline mode
+ */
+ public void setOfflineMode(boolean mode){
+ this.offline = mode;
+ }
+
+ public boolean isOfflineMode(){
+ return offline;
+ }
+ /**
+ * Setter for the {@link ServiceProperties#ENHANCEMENT_ENGINE_ORDERING
+ * engine ordering}.
+ * @param ordering The ordering or <code>null</code> to set the
+ * {@value #DEFAULT_ENGINE_ORDERING default} for this engine.
+ */
+ public void setEngineOrdering(Integer ordering){
+ serviceProperties.put(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
+ ordering == null ? DEFAULT_ENGINE_ORDERING : ordering);
+ }
+
+ public Integer getEngineOrdering(){
+ return (Integer)serviceProperties.get(ENHANCEMENT_ENGINE_ORDERING);
+ }
+
+ @Override
+ public Map<String,Object> getServiceProperties() {
+ return unmodServiceProperties;
+ }
+
+ @Override
+ public int canEnhance(ContentItem ci) throws EngineException {
+ if(offline && !dereferencer.supportsOfflineMode()){
+ return CANNOT_ENHANCE;
+ } else {
+ return ENHANCE_ASYNC;
+ }
+ }
+
+ @Override
+ public void computeEnhancements(ContentItem ci) throws EngineException {
+ if(offline && !dereferencer.supportsOfflineMode()){
+ //entity dereferencer does no longer support offline mode
+ return;
+ }
+ log.debug("> dereference Entities for ContentItem {}", ci.getUri());
+ final MGraph metadata = ci.getMetadata();
+ Set<UriRef> referencedEntities = new HashSet<UriRef>();
+ //(1) read all Entities we need to dereference from the parsed contentItem
+ ci.getLock().readLock().lock();
+ try {
+ Iterator<Triple> entityReferences = metadata.filter(null, ENHANCER_ENTITY_REFERENCE, null);
+ while(entityReferences.hasNext()){
+ Triple triple = entityReferences.next();
+ Resource entityReference = triple.getObject();
+ if(entityReference instanceof UriRef){
+ boolean added = referencedEntities.add((UriRef)entityReference);
+ if(added && log.isTraceEnabled()){
+ log.trace(" ... schedule Entity {}", entityReference);
+ }
+ } else if(log.isWarnEnabled()){
+ //log enhancement that use a fise:entiy-reference with a non UriRef value!
+ NonLiteral enhancement = triple.getSubject();
+ log.warn("Can not dereference invalid Enhancement {}",enhancement);
+ for(Iterator<Triple> it = metadata.filter(enhancement, null, null);it.hasNext();){
+ log.warn(" {}", it.next());
+ }
+ }
+ }
+ } finally {
+ ci.getLock().readLock().unlock();
+ }
+ final Lock writeLock = ci.getLock().writeLock();
+ log.trace(" - scheduled {} Entities for dereferencing", referencedEntities.size());
+ //(2) dereference the Entities
+ ExecutorService executor = dereferencer.getExecutor();
+ long start = System.currentTimeMillis();
+ Set<UriRef> failedEntities = new HashSet<UriRef>();
+ int dereferencedCount = 0;
+ List<DereferenceJob> dereferenceJobs = new ArrayList<DereferenceJob>(referencedEntities.size());
+ if(executor != null && !executor.isShutdown()){ //dereference using executor
+ //schedule all entities to dereference
+ for(final UriRef entity : referencedEntities){
+ DereferenceJob dereferenceJob = new DereferenceJob(entity, metadata, writeLock);
+ dereferenceJob.setFuture(executor.submit(dereferenceJob));
+ dereferenceJobs.add(dereferenceJob);
+ }
+ //wait for all entities to be dereferenced
+ for(DereferenceJob dereferenceJob : dereferenceJobs){
+ try {
+ if(dereferenceJob.await()){
+ dereferencedCount++;
+ }
+ } catch (InterruptedException e) {
+ // Restore the interrupted status
+ Thread.currentThread().interrupt();
+ throw new EngineException(this, ci,
+ "Interupted while waiting for dereferencing Entities", e);
+ } catch (ExecutionException e) {
+ if(e.getCause() instanceof DereferenceException){
+ failedEntities.add(dereferenceJob.entity);
+ log.debug(" ... error while dereferencing "
+ + dereferenceJob.entity + "!", e);
+ } else { //unknown error
+ throw new EngineException(this,ci, "Unchecked Error while "
+ + "dereferencing Entity " + dereferenceJob.entity +"!", e);
+ }
+ }
+ }
+ } else { //dereference using the current thread
+ for(UriRef entity : referencedEntities){
+ try {
+ log.trace(" ... dereference {}", entity);
+ if(dereferencer.dereference(entity, metadata, offline, writeLock)){
+ dereferencedCount++;
+ log.trace(" + success");
+ } else {
+ log.trace(" - not found");
+ }
+ } catch (DereferenceException e) {
+ log.debug(" ... error while dereferencing " + entity + "!", e);
+ failedEntities.add(entity);
+ }
+ }
+ }
+ long duration = System.currentTimeMillis() - start;
+ if(!failedEntities.isEmpty()){
+ log.warn(" - unable to dereference {} of {} for ContentItem {}",
+ new Object[] {failedEntities.size(),referencedEntities.size(),
+ ci.getUri()});
+ }
+ if(log.isDebugEnabled()){
+ log.debug(" - dereferenced {} of {} Entities in {}ms ({}ms/dereferenced)",
+ new Object[]{dereferencedCount, referencedEntities.size(),
+ duration, (duration*100/dereferencedCount)/100.0f});
+ }
+
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ /**
+ * Used both as {@link Callable} submitted to the {@link ExecutorService}
+ * and as object to {@link #await()} the completion of the task.
+ * @author Rupert Westenthaler
+ *
+ */
+ class DereferenceJob implements Callable<Boolean> {
+
+ final UriRef entity;
+ final MGraph metadata;
+ final Lock writeLock;
+
+ private Future<Boolean> future;
+
+ DereferenceJob(UriRef entity, MGraph metadata, Lock writeLock){
+ this.entity = entity;
+ this.metadata = metadata;
+ this.writeLock = writeLock;
+ }
+
+ @Override
+ public Boolean call() throws DereferenceException {
+ log.trace(" ... dereference {}", entity);
+ boolean state = dereferencer.dereference(entity, metadata, offline, writeLock);
+ if(state){
+ log.trace(" + success");
+ } else {
+ log.trace(" - not found");
+ }
+ return state;
+ }
+
+ void setFuture(Future<Boolean> future){
+ this.future = future;
+ }
+
+ public boolean await() throws InterruptedException, ExecutionException {
+ return future.get();
+ }
+ }
+
+}
Added: stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java?rev=1545240&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/main/java/org/apache/stanbol/enhancer/engines/dereference/EntityDereferencer.java Mon Nov 25 12:09:15 2013
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dereference;
+
+import java.util.ConcurrentModificationException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.locks.Lock;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+
+/**
+ * Interface used by the {@link EntityDereferenceEngine} to dereference
+ * Entities
+ *
+ * @author Rupert Westenthaler
+ *
+ */
+public interface EntityDereferencer {
+
+ /**
+ * If this EntityDereferences can dereference Entities when in
+ * {@link OfflineMode}. This method is expected to only return <code>false</code>
+ * when an implementation can not dereference any Entities when in offline
+ * mode. If some (e.g. locally cached) Entities can be dereferenced
+ * the dereferences should return <code>true<code> and just ignore calles
+ * for Entities that are not locally available.
+ * @return the {@link OfflineMode} status
+ */
+ boolean supportsOfflineMode();
+
+ /**
+ * EntityDereferencer can optionally provide an ExecutorService used to
+ * dereference Entities.
+ * @return the {@link ExecutorService} or <code>null</code> if not used
+ * by this implementation
+ */
+ ExecutorService getExecutor();
+
+ /**
+ * Dereferences the Entity with the parsed {@link UriRef} by copying the
+ * data to the parsed graph
+ * @param graph the graph to add the dereferenced entity
+ * @param entity the uri of the Entity to dereference
+ * @param offlineMode <code>true</code> if {@link OfflineMode} is active.
+ * Otherwise <code>false</code>
+ * @param writeLock The writeLock for the graph. Dereferences MUST require
+ * a <code>{@link Lock#lock() writeLock#lock()}</code> before adding
+ * dereferenced data to the parsed graph. This is essential for using multiple
+ * threads to dereference Entities. Failing to do so will cause
+ * {@link ConcurrentModificationException}s in this implementations or
+ * other components (typically other {@link EnhancementEngine}s) accessing the
+ * same graph.
+ * @return if the entity was dereferenced
+ * @throws DereferenceException on any error while dereferencing the
+ * requested Entity
+ */
+ boolean dereference(UriRef entity, MGraph graph, boolean offlineMode,
+ Lock writeLock) throws DereferenceException;
+
+}
Added: stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java?rev=1545240&view=auto
==============================================================================
--- stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java (added)
+++ stanbol/trunk/enhancement-engines/dereference/core/src/test/java/org/apache/stanbol/enhancer/engines/dereference/DereferenceEngineTest.java Mon Nov 25 12:09:15 2013
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.dereference;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.SKOS_CONCEPT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDFS_LABEL;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Random;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.locks.Lock;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.TripleCollection;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.stanbol.commons.indexedgraph.IndexedMGraph;
+import org.apache.stanbol.commons.stanboltools.offline.OfflineMode;
+import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * TODO: convert this to an integration test!
+ * @author Rupert Westenthaler
+ */
+public class DereferenceEngineTest {
+
+ private final static Logger log = LoggerFactory.getLogger(DereferenceEngineTest.class);
+
+ //TODO: test implementations of EntityDereferencer
+ static EntityDereferencer asyncDereferencer = new TestDereferencer(Executors.newFixedThreadPool(4));
+ static EntityDereferencer syncDereferencer = new TestDereferencer(null);
+
+ /**
+ * The metadata used by this test
+ */
+ private static TripleCollection testData;
+
+ private static TripleCollection testMetadata;
+
+ public static final UriRef NAME = new UriRef(NamespaceEnum.rdfs+"label");
+ public static final UriRef TYPE = new UriRef(NamespaceEnum.rdf+"type");
+ public static final UriRef REDIRECT = new UriRef(NamespaceEnum.rdfs+"seeAlso");
+
+ private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
+
+ private static final LiteralFactory lf = LiteralFactory.getInstance();
+ private static final UriRef SKOS_NOTATION = new UriRef(NamespaceEnum.skos+"notation");
+ private static final Language LANG_EN = new Language("en");
+ private static final Language LANG_DE = new Language("de");
+
+ private static final int NUM_ENTITIES = 1000;
+
+ public static final float PERCENTAGE_LINKED = 0.3f;
+ public static final float PERCENTAGE_PRESENT = 0.9f;
+
+ @BeforeClass
+ public static void setUpServices() throws IOException {
+ testData = new IndexedMGraph();
+ long seed = System.currentTimeMillis();
+ log.info("Test seed "+ seed);
+ Random random = new Random(seed);
+ int numEntities = 0;
+ for(int i = 0; i < NUM_ENTITIES ; i++){
+ if(random.nextFloat() <= PERCENTAGE_PRESENT){ //do not create all entities
+ UriRef uri = new UriRef("urn:test:entity"+i);
+ testData.add(new TripleImpl(uri, RDF_TYPE, SKOS_CONCEPT));
+ testData.add(new TripleImpl(uri, RDFS_LABEL,
+ new PlainLiteralImpl("entity "+i, LANG_EN)));
+ testData.add(new TripleImpl(uri, RDFS_LABEL,
+ new PlainLiteralImpl("Entity "+i, LANG_DE)));
+ testData.add(new TripleImpl(uri, SKOS_NOTATION,
+ lf.createTypedLiteral(i)));
+ numEntities++;
+ }
+ }
+ log.info(" ... created {} Entities",numEntities);
+ testMetadata = new IndexedMGraph();
+ int numLinks = 0;
+ for(int i = 0; i < NUM_ENTITIES ; i++){
+ if(random.nextFloat() < PERCENTAGE_LINKED){
+ UriRef enhancementUri = new UriRef("urn:test:enhancement"+i);
+ UriRef entityUri = new UriRef("urn:test:entity"+i);
+ //we do not need any other triple for testing in the contentItem
+ testMetadata.add(new TripleImpl(enhancementUri, ENHANCER_ENTITY_REFERENCE, entityUri));
+ numLinks++;
+ }
+ }
+ log.info(" ... created {} Entity references ", numLinks);
+
+ }
+
+ public static ContentItem getContentItem(final String id) throws IOException {
+ ContentItem ci = ciFactory.createContentItem(new UriRef(id), new StringSource("Not used"));
+ ci.getMetadata().addAll(testMetadata);
+ return ci;
+ }
+ /**
+ * Test {@link OfflineMode} functionality
+ * @throws Exception
+ */
+ @Test
+ public void testOfflineMode() throws Exception {
+ ContentItem ci = getContentItem("urn:test:testOfflineMode");
+ EntityDereferencer onlineDereferencer = new TestDereferencer(null){
+ @Override
+ public boolean supportsOfflineMode() {
+ return false;
+ }
+ };
+ EntityDereferenceEngine engine = new EntityDereferenceEngine("online", onlineDereferencer);
+ //engine in online mode
+ Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
+ //set engine in offline mode
+ engine.setOfflineMode(true);
+ Assert.assertEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
+ }
+
+ @Test
+ public void testSyncDereferencing() throws Exception {
+ ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
+ EntityDereferenceEngine engine = new EntityDereferenceEngine("sync", syncDereferencer);
+ Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
+ engine.computeEnhancements(ci);
+ validateDereferencedEntities(ci.getMetadata());
+ }
+
+ @Test
+ public void testAsyncDereferencing() throws Exception {
+ ContentItem ci = getContentItem("urn:test:testSyncDereferencing");
+ EntityDereferenceEngine engine = new EntityDereferenceEngine("sync", asyncDereferencer);
+ Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
+ engine.computeEnhancements(ci);
+ validateDereferencedEntities(ci.getMetadata());
+ }
+
+ private void validateDereferencedEntities(TripleCollection metadata) {
+ Iterator<Triple> referenced = metadata.filter(null, ENHANCER_ENTITY_REFERENCE, null);
+ MGraph expected = new IndexedMGraph();
+ while(referenced.hasNext()){
+ UriRef entity = (UriRef)referenced.next().getObject();
+ Iterator<Triple> entityTriples = testData.filter(entity, null, null);
+ while(entityTriples.hasNext()){
+ expected.add(entityTriples.next());
+ }
+ }
+ MGraph notExpected = new IndexedMGraph(testData);
+ notExpected.removeAll(expected);
+ Assert.assertTrue(metadata.containsAll(expected));
+ Assert.assertTrue(Collections.disjoint(metadata, notExpected));
+ }
+
+ private static class TestDereferencer implements EntityDereferencer {
+
+ private final ExecutorService executorService;
+
+ public TestDereferencer(ExecutorService executorService) {
+ this.executorService = executorService;
+ }
+
+ @Override
+ public boolean supportsOfflineMode() {
+ return true;
+ }
+
+ @Override
+ public ExecutorService getExecutor() {
+ return executorService;
+ }
+
+ @Override
+ public boolean dereference(UriRef entity, MGraph graph, boolean offlineMode, Lock writeLock) throws DereferenceException {
+ Iterator<Triple> entityTriples = testData.filter(entity, null, null);
+ if(entityTriples.hasNext()){
+ writeLock.lock();
+ try {
+ do {
+ graph.add(entityTriples.next());
+ } while (entityTriples.hasNext());
+ } finally {
+ writeLock.unlock();
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ }
+
+}
Modified: stanbol/trunk/enhancement-engines/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/pom.xml?rev=1545240&r1=1545239&r2=1545240&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/pom.xml (original)
+++ stanbol/trunk/enhancement-engines/pom.xml Mon Nov 25 12:09:15 2013
@@ -102,7 +102,10 @@
<!-- converts TextAnnotations to the STANBOL-987 model -->
<module>textannotationnewmodel</module>
<!-- finds co-mentions of Entities earlier mentioned on the Text (STANBOL-1070) -->
- <module>entitycomention</module>
+ <module>entitycomention</module>
+
+ <!-- Entity Dereference (STANBOL-336) -->
+ <module>dereference/core</module>
<!-- Enhancement Engines using external services -->
<module>celi</module> <!-- http://linguagrid.org -->