You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/11/26 12:39:38 UTC
svn commit: r1413560 [2/3] - in /stanbol/trunk: data/ data/defaultconfig/ data/defaultconfig/src/main/resources/ data/defaultconfig/src/main/resources/config/ data/opennlp/lang/de/ data/sentiment/ data/sentiment/sentiwordnet/ data/sentiment/sentiwordne...

Modified: stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml (original)
+++ stanbol/trunk/enhancer/bundlelist/src/main/bundles/list.xml Mon Nov 26 11:39:25 2012
@@ -51,6 +51,11 @@
       <artifactId>org.apache.stanbol.enhancer.core</artifactId>
       <version>0.10.0-SNAPSHOT</version>
     </bundle>
+    <bundle> <!-- NLP processing (STANBOL-733) -->
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.nlp</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </bundle>
   </startLevel>
   <!-- LDPath -->
   <startLevel level="30">
@@ -146,25 +151,74 @@
       <artifactId>org.apache.stanbol.enhancer.engines.tika</artifactId>
       <version>0.10.0-SNAPSHOT</version>
     </bundle>
-
-    <!-- Named Entity Recoqunition (NER)-->
-
+    
+    <!-- NLP processing engines (all STANBOL-733 and sub-tasks) -->
+    
+    <bundle><!-- sentence detection with OpenNLP -->
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.engines.opennlp.sentence</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </bundle>
+    <bundle><!-- OpenNLP based tokenizing of Texts -->
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.engines.opennlp.token</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </bundle>
+    <bundle><!-- POS tagging with OpenNLP -->
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.engines.opennlp.pos</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </bundle>
+    <bundle><!-- Chunking tagging with OpenNLP -->
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.engines.opennlp.chunker</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </bundle>
     <bundle> <!-- Open NLP based NER -->
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.enhancer.engines.opennlp.ner</artifactId>
       <version>0.10.0-SNAPSHOT</version>
     </bundle>
+    <!-- NLP metadata to RDF (using NIF 1.0) - NOT YET READY FOR DEFAULT CONFIG
+    <bundle>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.engines.nlp2rdf</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </bundle>  -->
+    <!-- Sentiment Enhancement Engines -->
+    <bundle><!-- Sentiment Word Classifiers -->
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.engines.sentiment.wordclassifier</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </bundle>
+    <!-- NOT YET READY FOR DEFAULT CONFIG
+    <bundle>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.engines.sentiment.summarization</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </bundle> -->
+    
     <!-- Entity Extraction/Linking -->
     <bundle><!-- NER linking (depends on the Entityhub) -->
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.enhancer.engine.entitytagging</artifactId>
       <version>0.10.0-SNAPSHOT</version>
     </bundle>
-    <bundle><!-- Keyword Extraction from Text (depends on the Entityhub) -->
+    <bundle><!-- Keyword Extraction from Text DEPRECATED! (depends on the Entityhub) -->
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.enhancer.engine.keywordextraction</artifactId>
       <version>0.10.0-SNAPSHOT</version>
     </bundle>
+    <bundle><!-- EntityLinking based on the Entityhub -->
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.engines.entitylinking</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </bundle>
+    <bundle><!-- EntityLinking for the Stanbol Entityhub -->
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.engines.entityhublinking</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </bundle>
 
     <!-- Refactor Enhancement Engine -->
 

Propchange: stanbol/trunk/enhancer/engines/celi/
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Mon Nov 26 11:39:25 2012
@@ -0,0 +1,4 @@
+/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/celi:1374978-1386535
+/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi:1386989-1388016
+/incubator/stanbol/trunk/enhancer/engines/celi:1339554,1339557-1339558
+/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi:1388017-1413353

Modified: stanbol/trunk/enhancer/engines/celi/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/celi/pom.xml?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/celi/pom.xml (original)
+++ stanbol/trunk/enhancer/engines/celi/pom.xml Mon Nov 26 11:39:25 2012
@@ -1,153 +1,154 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
-	license agreements. See the NOTICE file distributed with this work for additional 
-	information regarding copyright ownership. The ASF licenses this file to 
-	You under the Apache License, Version 2.0 (the "License"); you may not use 
-	this file except in compliance with the License. You may obtain a copy of 
-	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
-	by applicable law or agreed to in writing, software distributed under the 
-	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
-	OF ANY KIND, either express or implied. See the License for the specific 
-	language governing permissions and limitations under the License. -->
+  license agreements. See the NOTICE file distributed with this work for additional 
+  information regarding copyright ownership. The ASF licenses this file to 
+  You under the Apache License, Version 2.0 (the "License"); you may not use 
+  this file except in compliance with the License. You may obtain a copy of 
+  the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+  by applicable law or agreed to in writing, software distributed under the 
+  License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+  OF ANY KIND, either express or implied. See the License for the specific 
+  language governing permissions and limitations under the License. -->
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 
-	<modelVersion>4.0.0</modelVersion>
+  <modelVersion>4.0.0</modelVersion>
 
-	<parent>
-		<groupId>org.apache.stanbol</groupId>
-		<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
-		<version>0.10.0-SNAPSHOT</version>
-		<relativePath>../../parent</relativePath>
-	</parent>
-
-	<groupId>org.apache.stanbol</groupId>
-	<artifactId>org.apache.stanbol.enhancer.engines.celi</artifactId>
-	<packaging>bundle</packaging>
+  <parent>
+    <groupId>org.apache.stanbol</groupId>
+    <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+    <version>0.10.0-SNAPSHOT</version>
+    <relativePath>../../parent</relativePath>
+  </parent>
+
+  <groupId>org.apache.stanbol</groupId>
+  <artifactId>org.apache.stanbol.enhancer.engines.celi</artifactId>
+  <packaging>bundle</packaging>
 
 	<name>Apache Stanbol Enhancer Enhancement Engine : CELI Engine</name>
 	<description>Enhancement Engine using the CELI service.</description>
 	<inceptionYear>2012</inceptionYear>
 
-	<dependencies>
-		<dependency>
-			<groupId>org.apache.stanbol</groupId>
-			<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
-            <version>0.10.0-SNAPSHOT</version>
-		</dependency>
-
-		<dependency>
-			<groupId>org.apache.stanbol</groupId>
-			<artifactId>org.apache.stanbol.commons.stanboltools.datafileprovider</artifactId>
-            <version>0.9.0-incubating</version>
-		</dependency>
-
-		<dependency>
-			<groupId>org.apache.clerezza</groupId>
-			<artifactId>rdf.core</artifactId>
-		</dependency>
-
-		<dependency>
-			<groupId>org.apache.felix</groupId>
-			<artifactId>org.apache.felix.scr.annotations</artifactId>
-			<scope>provided</scope>
-		</dependency>
-
-		<!-- generic tax -->
-        <dependency>
-            <groupId>commons-lang</groupId>
-            <artifactId>commons-lang</artifactId>
-        </dependency> 
-		<dependency>
-			<groupId>org.apache.httpcomponents</groupId>
-			<artifactId>httpclient-osgi</artifactId>
-		</dependency>
-        <dependency>
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-api</artifactId>
-        </dependency>
-        <dependency>
-	        <groupId>org.apache.stanbol</groupId>
-	        <artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
-            <version>0.9.0-incubating</version>
-	        <scope>provided</scope>
-    	</dependency> 
-		
-		<!-- test -->
-        <dependency>
-            <groupId>org.apache.stanbol</groupId>
-            <artifactId>org.apache.stanbol.enhancer.test</artifactId>
-            <version>0.10.0-SNAPSHOT</version>
-            <scope>test</scope>
-        </dependency>
-		<dependency>
-            <groupId>org.apache.stanbol</groupId>
-            <artifactId>org.apache.stanbol.enhancer.core</artifactId>
-            <version>0.10.0-SNAPSHOT</version>
-            <scope>test</scope>
-        </dependency>
-	    <dependency><!--  for debugging enhancements -->
-  	        <groupId>org.apache.clerezza</groupId>
-	        <artifactId>rdf.jena.serializer</artifactId>
-	        <scope>test</scope>
-	    </dependency>
-        <dependency> <!-- we use log4j 1.2 -->
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-log4j12</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>log4j</groupId>
-            <artifactId>log4j</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-            <scope>test</scope>
-        </dependency>
-
-		
-	</dependencies>
-
-	<build>
-		<plugins>
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-surefire-plugin</artifactId>
-				<configuration>
-					<skipTests>false</skipTests>
-				</configuration>
-			</plugin>
-			<plugin>
-				<groupId>org.apache.felix</groupId>
-				<artifactId>maven-bundle-plugin</artifactId>
-				<extensions>true</extensions>
-				<configuration>
-					<instructions>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </dependency>
+    <dependency> <!-- STANBOL-739: adapt Lemmatizer Engine to use AnalyzedText -->
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.nlp</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.commons.stanboltools.datafileprovider</artifactId>
+      <version>0.9.0-incubating</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.clerezza</groupId>
+      <artifactId>rdf.core</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.felix</groupId>
+      <artifactId>org.apache.felix.scr.annotations</artifactId>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- generic tax -->
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpclient-osgi</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
+      <version>0.9.0-incubating</version>
+      <scope>provided</scope>
+    </dependency>
+
+    <!-- test -->
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.test</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency><!-- for debugging enhancements -->
+      <groupId>org.apache.clerezza</groupId>
+      <artifactId>rdf.jena.serializer</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency> <!-- we use log4j 1.2 -->
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+  	<plugins>
+  		<plugin>
+  			<groupId>org.apache.maven.plugins</groupId>
+  			<artifactId>maven-surefire-plugin</artifactId>
+  			<configuration>
+  				<skipTests>false</skipTests>
+  			</configuration>
+  		</plugin>
+  		<plugin>
+  			<groupId>org.apache.felix</groupId>
+  			<artifactId>maven-bundle-plugin</artifactId>
+  			<extensions>true</extensions>
+  			<configuration>
+  				<instructions>
                         <Import-Package>
                             org.apache.stanbol.enhancer.servicesapi; provide:=true,
                             org.apache.stanbol.enhancer.servicesapi.impl; provide:=true,
                             *
                         </Import-Package>
-					    <Export-Package>
-					        org.apache.stanbol.enhancer.engines.celi
-					    </Export-Package>
-						<Private-Package>
-							org.apache.stanbol.enhancer.engines.celi.ner.impl, 
-							org.apache.stanbol.enhancer.engines.celi.langid.impl, 
-							org.apache.stanbol.enhancer.engines.celi.classification.impl, 
-							org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl,
+  				    <Export-Package>
+  				        org.apache.stanbol.enhancer.engines.celi
+  				    </Export-Package>
+  					<Private-Package>
+  						org.apache.stanbol.enhancer.engines.celi.ner.impl, 
+  						org.apache.stanbol.enhancer.engines.celi.langid.impl, 
+  						org.apache.stanbol.enhancer.engines.celi.classification.impl, 
+  						org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl,
                             org.apache.stanbol.enhancer.engines.celi.utils
-						</Private-Package>
-					</instructions>
-				</configuration>
-			</plugin>
-			<plugin>
-				<groupId>org.apache.felix</groupId>
-				<artifactId>maven-scr-plugin</artifactId>
-			</plugin>
-		</plugins>
-	</build>
-
+  					</Private-Package>
+  				</instructions>
+  			</configuration>
+  		</plugin>
+  		<plugin>
+  			<groupId>org.apache.felix</groupId>
+  			<artifactId>maven-scr-plugin</artifactId>
+  		</plugin>
+  	</plugins>
+  </build>
 </project>
\ No newline at end of file

Modified: stanbol/trunk/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java (original)
+++ stanbol/trunk/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java Mon Nov 26 11:39:25 2012
@@ -50,7 +50,18 @@ import org.apache.felix.scr.annotations.
 import org.apache.felix.scr.annotations.Service;
 import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
 import org.apache.stanbol.enhancer.engines.celi.CeliConstants;
+import org.apache.stanbol.enhancer.engines.celi.CeliMorphoFeatures;
+import org.apache.stanbol.enhancer.engines.celi.CeliTagSetRegistry;
 import org.apache.stanbol.enhancer.engines.celi.utils.Utils;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
+import org.apache.stanbol.enhancer.nlp.morpho.Case;
+import org.apache.stanbol.enhancer.nlp.morpho.Gender;
+import org.apache.stanbol.enhancer.nlp.morpho.NumberFeature;
+import org.apache.stanbol.enhancer.nlp.morpho.Person;
+import org.apache.stanbol.enhancer.nlp.morpho.Tense;
+import org.apache.stanbol.enhancer.nlp.morpho.TenseTag;
+import org.apache.stanbol.enhancer.nlp.morpho.VerbMood;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -67,22 +78,16 @@ import org.slf4j.LoggerFactory;
 
 @Component(immediate = true, metatype = true)
 @Service
-@Properties(value = { 
-    @Property(name = EnhancementEngine.PROPERTY_NAME, value = "celiLemmatizer"),
-    @Property(name = CeliConstants.CELI_LICENSE),
-    @Property(name = CeliConstants.CELI_TEST_ACCOUNT,boolValue=false)
-})
+@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "celiLemmatizer"), @Property(name = CeliConstants.CELI_LICENSE), @Property(name = CeliConstants.CELI_TEST_ACCOUNT, boolValue = false) })
 public class CeliLemmatizerEnhancementEngine extends AbstractEnhancementEngine<IOException, RuntimeException> implements EnhancementEngine, ServiceProperties {
-	
+	// TODO: check if it is OK to define new properties in the FISE namespace
+	public static final UriRef hasLemmaForm = new UriRef("http://fise.iks-project.eu/ontology/hasLemmaForm");
+
 	/**
-	 * This ensures that no connections to external services are made if Stanbol is started in offline mode 
-	 * as the OnlineMode service will only be available if OfflineMode is deactivated. 
+	 * This ensures that no connections to external services are made if Stanbol is started in offline mode as the OnlineMode service will only be available if OfflineMode is deactivated.
 	 */
 	@Reference
-    private OnlineMode onlineMode; 
-	//TODO: check if it is OK to define new properties in the FISE namespace
-	public static final UriRef hasLemmaForm = new UriRef("http://fise.iks-project.eu/ontology/hasLemmaForm");
-	public static final UriRef hasMorphoFeature = new UriRef("http://fise.iks-project.eu/ontology/hasMorphologicalFeature");
+	private OnlineMode onlineMode;
 
 	private static List<String> supportedLangs = new Vector<String>();
 	static {
@@ -99,17 +104,14 @@ public class CeliLemmatizerEnhancementEn
 	public static final Literal LANG_ID_ENGINE_NAME = LiteralFactory.getInstance().createTypedLiteral("org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine");
 
 	/**
-	 * The default value for the Execution of this Engine. Currently set to
-	 * {@link ServiceProperties#ORDERING_CONTENT_EXTRACTION}
+	 * The default value for the Execution of this Engine. Currently set to {@link ServiceProperties#ORDERING_CONTENT_EXTRACTION}
 	 */
 	public static final Integer defaultOrder = ServiceProperties.ORDERING_CONTENT_EXTRACTION;
 
 	private Logger log = LoggerFactory.getLogger(getClass());
 
-
 	/**
-	 * This contains the only MIME type directly supported by this enhancement
-	 * engine.
+	 * This contains the only MIME type directly supported by this enhancement engine.
 	 */
 	private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
 
@@ -135,7 +137,7 @@ public class CeliLemmatizerEnhancementEn
 	protected void activate(ComponentContext ctx) throws IOException, ConfigurationException {
 		super.activate(ctx);
 		Dictionary<String, Object> properties = ctx.getProperties();
-        this.licenseKey = Utils.getLicenseKey(properties,ctx.getBundleContext());
+		this.licenseKey = Utils.getLicenseKey(properties, ctx.getBundleContext());
 		String url = (String) properties.get(SERVICE_URL);
 		if (url == null || url.isEmpty()) {
 			throw new ConfigurationException(SERVICE_URL, String.format("%s : please configure the URL of the CELI Web Service (e.g. by" + "using the 'Configuration' tab of the Apache Felix Web Console).", getClass().getSimpleName()));
@@ -159,10 +161,8 @@ public class CeliLemmatizerEnhancementEn
 	@Override
 	public int canEnhance(ContentItem ci) throws EngineException {
 		String language = EnhancementEngineHelper.getLanguage(ci);
-		if(language==null) {
-		    log.warn("Unable to enhance ContentItem {} because language of the Content is unknown." +
-		    		"Please check that a language identification engine is active in this EnhancementChain).",
-		    		ci.getUri());
+		if (language == null) {
+			log.warn("Unable to enhance ContentItem {} because language of the Content is unknown." + "Please check that a language identification engine is active in this EnhancementChain).", ci.getUri());
 		}
 		if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null && this.isLangSupported(language))
 			return ENHANCE_ASYNC;
@@ -172,19 +172,15 @@ public class CeliLemmatizerEnhancementEn
 
 	@Override
 	public void computeEnhancements(ContentItem ci) throws EngineException {
-        String language = EnhancementEngineHelper.getLanguage(ci);
-		if (!isLangSupported(language)){
-		    throw new IllegalStateException("Call to computeEnhancement with unsupported language '"
-		            +language+" for ContentItem "+ ci.getUri() +": This is also checked "
-		            + "in the canEnhance method! -> This indicated an Bug in the "
-		            + "implementation of the " + "EnhancementJobManager!");
+		String language = EnhancementEngineHelper.getLanguage(ci);
+		if (!isLangSupported(language)) {
+			throw new IllegalStateException("Call to computeEnhancement with unsupported language '" + language + " for ContentItem " + ci.getUri() + ": This is also checked " + "in the canEnhance method! -> This indicated an Bug in the "
+					+ "implementation of the " + "EnhancementJobManager!");
 		}
-		Language lang = new Language(language); //clerezza language for PlainLiterals
+
 		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
 		if (contentPart == null) {
-			throw new IllegalStateException("No ContentPart with Mimetype '"
-			        + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " 
-			        + ci.getUri() + ": This is also checked in the canEnhance method! -> This "
+			throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This "
 					+ "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
 		}
 		String text;
@@ -198,70 +194,80 @@ public class CeliLemmatizerEnhancementEn
 			return;
 		}
 
-		MGraph g = ci.getMetadata();
-		LiteralFactory literalFactory = LiteralFactory.getInstance();
+		MGraph graph = ci.getMetadata();
 
 		if (this.completeMorphoAnalysis) {
-		    List<LexicalEntry> terms;
-	        try {
-	            terms = this.client.performMorfologicalAnalysis(text, language);
-	        } catch (IOException e) {
-	            throw new EngineException("Error while calling the CELI Lemmatizer"
-	                +" service (configured URL: "
-	                +serviceURL+")!",e);
-	        } catch (SOAPException e) {
-	            throw new EngineException("Error wile encoding/decoding the request/"
-	                +"response to the CELI lemmatizer service!",e);
-	        }
-	        //get a write lock before writing the enhancements
-	        ci.getLock().writeLock().lock();
-	        try {
-				for (LexicalEntry le : terms) {
-				    if(!le.termReadings.isEmpty()){
-    					UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
-    					g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, 
-    					    new PlainLiteralImpl(le.getWordForm(),lang)));
-    					if (le.from >= 0 && le.to > 0) {
-    						g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(le.from)));
-    						g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(le.to)));
-                            g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, 
-                                new PlainLiteralImpl(getSelectionContext(text, le.getWordForm(), le.from), lang)));
-    					}
-    					for (Reading r : le.termReadings) {
-    						g.add(new TripleImpl(textAnnotation, hasLemmaForm, 
-    						    new PlainLiteralImpl(r.getLemma(),lang)));
-    						for (Entry<String,String> entry : r.lexicalFeatures.entrySet()) {
-    							g.add(new TripleImpl(textAnnotation, hasMorphoFeature,
-    							    literalFactory.createTypedLiteral(entry.getKey() + "=" + entry.getValue())));
-    						}
-    					}
-				    } //TODO: check if it is OK to ignore lexical entries with no readings
-				}
-	        } finally {
-	            ci.getLock().writeLock().unlock();
-	        }
+			this.addMorphoAnalysisEnhancement(ci, text, language, graph);
 		} else {
-		    String lemmatizedContents;
-	        try {
-	            lemmatizedContents = this.client.lemmatizeContents(text, language);
-            } catch (IOException e) {
-                throw new EngineException("Error while calling the CELI Lemmatizer"
-                    +" service (configured URL: "
-                    +serviceURL+")!",e);
-            } catch (SOAPException e) {
-                throw new EngineException("Error wile encoding/decoding the request/"
-                    +"response to the CELI lemmatizer service!",e);
-            }
-            //get a write lock before writing the enhancements
-            ci.getLock().writeLock().lock();
-            try {
-				UriRef textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
-				g.add(new TripleImpl(textEnhancement, hasLemmaForm, 
-				    new PlainLiteralImpl(lemmatizedContents,lang)));
-            } finally {
-                ci.getLock().writeLock().unlock();
-            }
+			this.addLemmatizationEnhancement(ci, text, language, graph);
+		}
+	}
+
+	private void addMorphoAnalysisEnhancement(ContentItem ci, String text, String language, MGraph g) throws EngineException {
+		Language lang = new Language(language); // clerezza language for PlainLiterals
+		List<LexicalEntry> terms;
+		try {
+			terms = this.client.performMorfologicalAnalysis(text, language);
+		} catch (IOException e) {
+			throw new EngineException("Error while calling the CELI Lemmatizer" + " service (configured URL: " + serviceURL + ")!", e);
+		} catch (SOAPException e) {
+			throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI lemmatizer service!", e);
+		}
+		// get a write lock before writing the enhancements
+		ci.getLock().writeLock().lock();
+		try {
+			LiteralFactory literalFactory = LiteralFactory.getInstance();
+			for (LexicalEntry le : terms) {
+
+				List<CeliMorphoFeatures> mFeatures = this.convertLexicalEntryToMorphFeatures(le, language);
+				for (CeliMorphoFeatures feat : mFeatures) {
+					// Create a text annotation for each interpretation produced by the morphological analyzer
+					UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
+					g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(le.getWordForm(), lang)));
+					if (le.from >= 0 && le.to > 0) {
+						g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(le.from)));
+						g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(le.to)));
+						g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, le.getWordForm(), le.from), lang)));
+					}
+					g.addAll(feat.featuresAsTriples(textAnnotation, lang));
+				}
+			}
+		} finally {
+			ci.getLock().writeLock().unlock();
+		}
+	}
+
+	private void addLemmatizationEnhancement(ContentItem ci, String text, String language, MGraph g) throws EngineException {
+		Language lang = new Language(language); // clerezza language for PlainLiterals
+		String lemmatizedContents;
+		try {
+			lemmatizedContents = this.client.lemmatizeContents(text, language);
+		} catch (IOException e) {
+			throw new EngineException("Error while calling the CELI Lemmatizer" + " service (configured URL: " + serviceURL + ")!", e);
+		} catch (SOAPException e) {
+			throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI lemmatizer service!", e);
+		}
+		// get a write lock before writing the enhancements
+		ci.getLock().writeLock().lock();
+		try {
+			UriRef textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
+			g.add(new TripleImpl(textEnhancement, CeliLemmatizerEnhancementEngine.hasLemmaForm, new PlainLiteralImpl(lemmatizedContents, lang)));
+		} finally {
+			ci.getLock().writeLock().unlock();
+		}
+	}
+
+	private List<CeliMorphoFeatures> convertLexicalEntryToMorphFeatures(LexicalEntry le, String lang) {
+		List<CeliMorphoFeatures> result = new Vector<CeliMorphoFeatures>();
+		if (!le.termReadings.isEmpty()) {
+			for (Reading r : le.termReadings) {
+				CeliMorphoFeatures morphoFeature = CeliMorphoFeatures.parseFrom(r, lang);
+				if(morphoFeature != null){
+				    result.add(morphoFeature);
+				}
+			}
 		}
+		return result;
 	}
 
 	private boolean isLangSupported(String language) {

Modified: stanbol/trunk/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java (original)
+++ stanbol/trunk/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java Mon Nov 26 11:39:25 2012
@@ -17,6 +17,7 @@
 package org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl;
 
 import java.io.BufferedWriter;
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStreamWriter;
@@ -39,6 +40,7 @@ import javax.xml.soap.SOAPPart;
 import javax.xml.transform.stream.StreamSource;
 
 import org.apache.clerezza.rdf.core.impl.util.Base64;
+import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.stanbol.enhancer.engines.celi.utils.Utils;
 import org.slf4j.Logger;
@@ -101,7 +103,12 @@ public class LemmatizerClientHTTP {
         long start = System.currentTimeMillis();
         InputStream stream = con.getInputStream();
         log.debug("Request to {} took {}ms",serviceEP,System.currentTimeMillis()-start);
-
+        if(log.isTraceEnabled()){
+            //log the response if trace is enabled
+            String soapResponse = IOUtils.toString(stream,"UTF-8");
+            log.trace("SoapResponse: \n{}\n",soapResponse);
+            stream = new ByteArrayInputStream(soapResponse.getBytes(Charset.forName("UTF-8")));
+        }
 		// Create SoapMessage
 		MessageFactory msgFactory = MessageFactory.newInstance();
 		SOAPMessage message = msgFactory.createMessage();
@@ -132,12 +139,18 @@ public class LemmatizerClientHTTP {
 					Element lemmaElm = (Element) lemmasList.item(j);
 					String lemma = lemmaElm.getTextContent();
 					NodeList features = ((Element)lemmaElm.getParentNode()).getElementsByTagNameNS("*","LexicalFeature");
-					Hashtable<String,String> featuresMap=new Hashtable<String,String>();
+					Hashtable<String,List<String>> featuresMap=new Hashtable<String,List<String>>();
 					for(int k=0;features!=null && k<features.getLength();k++){
 						Element feat = (Element) features.item(k);
 						String name = feat.getAttribute("name");
 						String value = feat.getTextContent();
-						featuresMap.put(name, value);
+						List<String> values=null;
+						if(featuresMap.containsKey(name))
+							values=featuresMap.get(name);
+						else
+							values=new Vector<String>();
+						values.add(value);
+						featuresMap.put(name, values);
 					}
 					Reading r=new Reading(lemma, featuresMap);
 					readings.add(r);

Modified: stanbol/trunk/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java (original)
+++ stanbol/trunk/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java Mon Nov 26 11:39:25 2012
@@ -17,13 +17,14 @@
 package org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl;
 
 import java.util.Hashtable;
+import java.util.List;
 
 public class Reading {
 	
 	String lemma;
-	Hashtable<String,String> lexicalFeatures;
+	Hashtable<String,List<String>> lexicalFeatures;
 	
-	public Reading(String lemma, Hashtable<String, String> lexicalFeatures) {
+	public Reading(String lemma, Hashtable<String, List<String>> lexicalFeatures) {
 		super();
 		this.lemma = lemma;
 		this.lexicalFeatures = lexicalFeatures;
@@ -37,11 +38,11 @@ public class Reading {
 		this.lemma = lemma;
 	}
 
-	public Hashtable<String, String> getLexicalFeatures() {
+	public Hashtable<String, List<String>> getLexicalFeatures() {
 		return lexicalFeatures;
 	}
 
-	public void setLexicalFeatures(Hashtable<String, String> lexicalFeatures) {
+	public void setLexicalFeatures(Hashtable<String, List<String>> lexicalFeatures) {
 		this.lexicalFeatures = lexicalFeatures;
 	}
 	

Modified: stanbol/trunk/enhancer/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java (original)
+++ stanbol/trunk/enhancer/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java Mon Nov 26 11:39:25 2012
@@ -19,7 +19,6 @@ package org.apache.stanbol.enhancer.engi
 import static org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.MORPHOLOGICAL_ANALYSIS;
 import static org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.SERVICE_URL;
 import static org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.hasLemmaForm;
-import static org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.hasMorphoFeature;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
@@ -52,8 +51,12 @@ import org.apache.clerezza.rdf.core.impl
 import org.apache.clerezza.rdf.ontologies.XSD;
 import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
 import org.apache.stanbol.enhancer.engines.celi.CeliConstants;
+import org.apache.stanbol.enhancer.engines.celi.CeliMorphoFeatures;
 import org.apache.stanbol.enhancer.engines.celi.testutils.MockComponentContext;
 import org.apache.stanbol.enhancer.engines.celi.testutils.TestUtils;
+import org.apache.stanbol.enhancer.nlp.morpho.Gender;
+import org.apache.stanbol.enhancer.nlp.morpho.NumberFeature;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -69,14 +72,15 @@ import org.slf4j.LoggerFactory;
 
 public class CeliLemmatizerEnhancementEngineTest {
 	
-	//static CeliLemmatizerEnhancementEngine morphoAnalysisEngine = new CeliLemmatizerEnhancementEngine();
+	static final String OLIA_NAMESPACE = "http://purl.org/olia/olia.owl#";
 
     private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
 
     private static final Logger log = LoggerFactory.getLogger(CeliLemmatizerEnhancementEngine.class);
 	private static final String TEXT = "Torino Ã¨ la principale cittÃ  del Piemonte.";
+	private static final String TERM = "casa";
 
-	public CeliLemmatizerEnhancementEngine initEngine(boolean completeMorphoAnalysis) throws IOException, ConfigurationException {
+	private CeliLemmatizerEnhancementEngine initEngine(boolean completeMorphoAnalysis) throws IOException, ConfigurationException {
 		Dictionary<String, Object> properties = new Hashtable<String, Object>();
 		properties.put(EnhancementEngine.PROPERTY_NAME, "celiLemmatizer");
         properties.put(CeliConstants.CELI_TEST_ACCOUNT, "true");
@@ -88,11 +92,11 @@ public class CeliLemmatizerEnhancementEn
 		return morphoAnalysisEngine;
 	}
 
-	public static void shutdownEngine(CeliLemmatizerEnhancementEngine morphoAnalysisEngine) {
+	private static void shutdownEngine(CeliLemmatizerEnhancementEngine morphoAnalysisEngine) {
 		morphoAnalysisEngine.deactivate(null);
 	}
 
-    public static ContentItem wrapAsContentItem(final String text) throws IOException {
+    private static ContentItem wrapAsContentItem(final String text) throws IOException {
         return ciFactory.createContentItem(new StringSource(text));
     }
 
@@ -129,8 +133,7 @@ public class CeliLemmatizerEnhancementEn
         validateEnhancement(ci.getMetadata(), (UriRef)lemmaTextAnnotation, expectedValues);
         //validate the lemma form TextAnnotation
         int lemmaForms = validateLemmaFormProperty(ci.getMetadata(), lemmaTextAnnotation,"it");
-        assertTrue("Only a single LemmaForm property is expected if '"+
-                MORPHOLOGICAL_ANALYSIS+"=false'",lemmaForms == 1);
+        assertTrue("Only a single LemmaForm property is expected if '"+ MORPHOLOGICAL_ANALYSIS+"=false'",lemmaForms == 1);
         shutdownEngine(morphoAnalysisEngine);
 	}
 
@@ -138,7 +141,7 @@ public class CeliLemmatizerEnhancementEn
 	
     @Test
     public void testCompleteMorphoAnalysis() throws Exception {
-        ContentItem ci = wrapAsContentItem(TEXT);
+        ContentItem ci = wrapAsContentItem(TERM);
         //add a simple triple to statically define the language of the test
         //content
         ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
@@ -167,10 +170,9 @@ public class CeliLemmatizerEnhancementEn
         while (textAnnotationIterator.hasNext()) {
             UriRef textAnnotation = (UriRef) textAnnotationIterator.next().getSubject();
             // test if selected Text is added
-            validateTextAnnotation(ci.getMetadata(), textAnnotation,TEXT,expectedValues);
+            validateTextAnnotation(ci.getMetadata(), textAnnotation,TERM,expectedValues);
             textAnnotationCount++;
             //perform additional tests for "hasMorphologicalFeature" and "hasLemmaForm"
-            validateLemmaFormProperty(ci.getMetadata(), textAnnotation,"it");
             validateMorphoFeatureProperty(ci.getMetadata(),textAnnotation);
         }
         log.info("{} TextAnnotations found and validated ...",textAnnotationCount);
@@ -196,8 +198,7 @@ public class CeliLemmatizerEnhancementEn
             Resource lemmaForms = lemmaFormsIterator.next().getObject();
             assertTrue("Lemma Forms value are expected of type PlainLiteral", lemmaForms instanceof PlainLiteral);
             assertFalse("Lemma forms MUST NOT be empty",((PlainLiteral)lemmaForms).getLexicalForm().isEmpty());
-            assertNotNull("Language of the Lemma Form literal MUST BE the same as for the parsed text",
-               ((PlainLiteral)lemmaForms).getLanguage());
+            assertNotNull("Language of the Lemma Form literal MUST BE not null",((PlainLiteral)lemmaForms).getLanguage());
             assertEquals("Language of the Lemma Form literal MUST BE the same as for the parsed text",
                 lang, ((PlainLiteral)lemmaForms).getLanguage().toString());
         }
@@ -209,19 +210,55 @@ public class CeliLemmatizerEnhancementEn
      * @param textAnnotation the TextAnnotation to check
      */
     private void validateMorphoFeatureProperty(TripleCollection enhancements, NonLiteral textAnnotation) {
-        Iterator<Triple> morphoFeatureIterator = enhancements.filter(textAnnotation, hasMorphoFeature, null);
-        assertTrue("No Morpho Feature value found for TextAnnotation "+textAnnotation+"!", morphoFeatureIterator.hasNext());
+    	//This taste checks for known morpho features of a given input (constant TERM)
+        Iterator<Triple> morphoFeatureIterator = enhancements.filter(textAnnotation, RDF_TYPE, null);
+        assertTrue("No POS Morpho Feature value found for TextAnnotation "+textAnnotation+"!", morphoFeatureIterator.hasNext());
         while(morphoFeatureIterator.hasNext()){
             Resource morphoFeature = morphoFeatureIterator.next().getObject();
-            assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof TypedLiteral);
-            String feature = ((Literal)morphoFeature).getLexicalForm();
+            assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof UriRef);
+            String feature=((UriRef)morphoFeature).getUnicodeString();
             assertFalse("Morpho Feature MUST NOT be empty",feature.isEmpty());
-            assertTrue("{key}={value} encoding expected (value:"+feature+")",feature.indexOf('=')>0);
-            String[] keyValue = feature.split("=");
-            assertTrue("{key}={value} encoding expected(value:"+feature+")",
-                keyValue.length == 2 && (!keyValue[0].isEmpty()) && (!keyValue[1].isEmpty()));
-            assertEquals("DataType of the Morpho Feature MUST BE xsd:string (for now)",XSD.string,
-               ((TypedLiteral)morphoFeature).getDataType());
+            if(feature.startsWith(OLIA_NAMESPACE)){
+            	String key=feature.substring(OLIA_NAMESPACE.length());
+            	LexicalCategory cat=LexicalCategory.valueOf(key);
+            	assertTrue("Part of Speech of "+TERM+" should be "+LexicalCategory.Noun , (cat==LexicalCategory.Noun));
+            }
         }
+        morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_GENDER, null);
+        assertTrue("No Gender Morpho Feature value found for TextAnnotation "+textAnnotation+"!", morphoFeatureIterator.hasNext());
+        if(morphoFeatureIterator.hasNext()){
+            Resource morphoFeature = morphoFeatureIterator.next().getObject();
+            assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof UriRef);
+            String feature=((UriRef)morphoFeature).getUnicodeString();
+            assertFalse("Morpho Feature MUST NOT be empty",feature.isEmpty());
+            if(feature.startsWith(OLIA_NAMESPACE)){
+            	String key=feature.substring(OLIA_NAMESPACE.length());
+            	Gender cat=Gender.valueOf(key);
+            	assertTrue("Gender of "+TERM+" should be "+Gender.Feminine , (cat==Gender.Feminine));
+            }
+        }
+        morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_NUMBER, null);
+        assertTrue("No Number Morpho Feature value found for TextAnnotation "+textAnnotation+"!", morphoFeatureIterator.hasNext());
+        if(morphoFeatureIterator.hasNext()){
+            Resource morphoFeature = morphoFeatureIterator.next().getObject();
+            assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof UriRef);
+            String feature=((UriRef)morphoFeature).getUnicodeString();
+            assertFalse("Morpho Feature MUST NOT be empty",feature.isEmpty());
+            if(feature.startsWith(OLIA_NAMESPACE)){
+            	String key=feature.substring(OLIA_NAMESPACE.length());
+            	NumberFeature cat=NumberFeature.valueOf(key);
+            	assertTrue("Number of "+TERM+" should be "+Gender.Feminine , (cat==NumberFeature.Singular));
+            }
+        }
+        morphoFeatureIterator = enhancements.filter(textAnnotation, CeliLemmatizerEnhancementEngine.hasLemmaForm, null);
+        assertTrue("No Number Morpho Feature value found for TextAnnotation "+textAnnotation+"!", morphoFeatureIterator.hasNext());
+        if(morphoFeatureIterator.hasNext()){
+            Resource morphoFeature = morphoFeatureIterator.next().getObject();
+            assertTrue("Lemma Forms value are expected of type PlainLiteral", morphoFeature instanceof PlainLiteral);
+            assertFalse("Lemma forms MUST NOT be empty",((PlainLiteral)morphoFeature).getLexicalForm().isEmpty());
+            String feature=((PlainLiteral)morphoFeature).getLexicalForm();
+            assertTrue("Lemma of "+TERM+" should be "+TERM , (feature.equals(TERM)));
+        }
+        
     }
 }

Propchange: stanbol/trunk/enhancer/engines/dbpedia-spotlight/
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Mon Nov 26 11:39:25 2012
@@ -0,0 +1,4 @@
+/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/dbpedia-spotlight:1374978-1386535
+/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/dbpedia-spotlight:1386989-1388016
+/incubator/stanbol/trunk/enhancer/engines/dbpedia-spotlight:1339554,1339557-1339558
+/stanbol/branches/stanbol-nlp-processing/enhancer/engines/dbpedia-spotlight:1388017-1413353

Propchange: stanbol/trunk/enhancer/engines/entityhublinking/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 26 11:39:25 2012
@@ -0,0 +1,7 @@
+target
+
+.settings
+
+.project
+
+.classpath

Propchange: stanbol/trunk/enhancer/engines/entitylinking/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 26 11:39:25 2012
@@ -0,0 +1,7 @@
+.settings
+
+.classpath
+
+.project
+
+target

Copied: stanbol/trunk/enhancer/engines/entitylinking/pom.xml (from r1413353, stanbol/branches/stanbol-nlp-processing/enhancer/engines/entitylinking/pom.xml)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/entitylinking/pom.xml?p2=stanbol/trunk/enhancer/engines/entitylinking/pom.xml&p1=stanbol/branches/stanbol-nlp-processing/enhancer/engines/entitylinking/pom.xml&r1=1413353&r2=1413560&rev=1413560&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/entitylinking/pom.xml (original)
+++ stanbol/trunk/enhancer/engines/entitylinking/pom.xml Mon Nov 26 11:39:25 2012
@@ -165,7 +165,7 @@
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.data.opennlp.lang.en</artifactId>
-      <version>1.0.2-SNAPSHOT</version>
+      <version>1.1.0-SNAPSHOT</version>
       <scope>test</scope>
     </dependency>
 

Propchange: stanbol/trunk/enhancer/engines/langdetect/
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Mon Nov 26 11:39:25 2012
@@ -0,0 +1,4 @@
+/incubator/stanbol/branches/dbpedia-spotlight-engines/engines/langdetect:1374978-1386535
+/incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/langdetect:1386989-1388016
+/incubator/stanbol/trunk/enhancer/engines/langdetect:1339554,1339557-1339558
+/stanbol/branches/stanbol-nlp-processing/enhancer/engines/langdetect:1388017-1413353

Modified: stanbol/trunk/enhancer/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java (original)
+++ stanbol/trunk/enhancer/engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java Mon Nov 26 11:39:25 2012
@@ -87,14 +87,14 @@ public class LanguageDetectionEnhancemen
     public static final String MAX_SUGGESTED_PROP = "org.apache.stanbol.enhancer.engines.langdetect.max-suggested";
 
     /**
-     * The default value for the Execution of this Engine. Currently set to
-     * {@link ServiceProperties#ORDERING_PRE_PROCESSING} - 2<p>
+     * The default value for the Execution of this Engine (
+     * {@link ServiceProperties#ORDERING_NLP_LANGAUGE_DETECTION})<p>
      * NOTE: this information is used by the default and weighed {@link Chain}
      * implementation to determine the processing order of 
      * {@link EnhancementEngine}s. Other {@link Chain} implementation do not
      * use this information.
      */
-    public static final Integer defaultOrder = ORDERING_PRE_PROCESSING - 2;
+    public static final Integer defaultOrder = ServiceProperties.ORDERING_NLP_LANGAUGE_DETECTION;
 
     /**
      * This contains the only MIME type directly supported by this enhancement engine.
@@ -266,7 +266,7 @@ public class LanguageDetectionEnhancemen
     }
 
     public Map<String, Object> getServiceProperties() {
-        return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
+        return Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder);
     }
 
 }

Modified: stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java (original)
+++ stanbol/trunk/enhancer/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java Mon Nov 26 11:39:25 2012
@@ -75,14 +75,14 @@ public class LangIdEnhancementEngine 
 
 
     /**
-     * The default value for the Execution of this Engine. Currently set to
-     * {@link ServiceProperties#ORDERING_PRE_PROCESSING} - 2<p>
+     * The default value for the Execution of this Engine (
+     * {@link ServiceProperties#ORDERING_NLP_LANGAUGE_DETECTION})<p>
      * NOTE: this information is used by the default and weighed {@link Chain}
      * implementation to determine the processing order of 
      * {@link EnhancementEngine}s. Other {@link Chain} implementation do not
      * use this information.
      */
-    public static final Integer defaultOrder = ORDERING_PRE_PROCESSING - 2;
+    public static final Integer defaultOrder = ServiceProperties.ORDERING_NLP_LANGAUGE_DETECTION;
 
     /**
      * This contains the only MIME type directly supported by this enhancement engine.
@@ -184,7 +184,7 @@ public class LangIdEnhancementEngine 
     }
 
     public Map<String, Object> getServiceProperties() {
-        return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
+        return Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder);
     }
 
 }

Propchange: stanbol/trunk/enhancer/engines/nlp2rdf/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 26 11:39:25 2012
@@ -0,0 +1,7 @@
+.settings
+
+.project
+
+.classpath
+
+target

Copied: stanbol/trunk/enhancer/engines/nlp2rdf/pom.xml (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/pom.xml)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/nlp2rdf/pom.xml?p2=stanbol/trunk/enhancer/engines/nlp2rdf/pom.xml&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/pom.xml&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/nlp2rdf/pom.xml (original)
+++ stanbol/trunk/enhancer/engines/nlp2rdf/pom.xml Mon Nov 26 11:39:25 2012
@@ -17,13 +17,13 @@
   <parent>
     <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
     <groupId>org.apache.stanbol</groupId>
-    <version>0.10.0-incubating-SNAPSHOT</version>
+    <version>0.10.0-SNAPSHOT</version>
     <relativePath>../../parent</relativePath>
   </parent>
 
   <groupId>org.apache.stanbol</groupId>
   <artifactId>org.apache.stanbol.enhancer.engines.nlp2rdf</artifactId>
-  <version>0.10.0-incubating-SNAPSHOT</version>
+  <version>0.10.0-SNAPSHOT</version>
   <packaging>bundle</packaging>
 
   <name>Apache Stanbol Enhancer Enhancement Engine: NLP data to RDF converter</name>
@@ -38,12 +38,12 @@
 
   <scm>
     <connection>
-      scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/nlp2rdf/
+      scm:svn:http://svn.apache.org/repos/asf/stanbol/trunk/enhancer/engines/nlp2rdf/
     </connection>
     <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/nlp2rdf/
+      scm:svn:https://svn.apache.org/repos/asf/stanbol/trunk/enhancer/engines/nlp2rdf/
     </developerConnection>
-    <url>http://incubator.apache.org/stanbol/</url>
+    <url>http://stanbol.apache.org/</url>
   </scm>
 
   <build>
@@ -81,17 +81,17 @@
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
-      <version>0.10.0-incubating-SNAPSHOT</version>
+      <version>0.10.0-SNAPSHOT</version>
     </dependency>
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.commons.opennlp</artifactId>
-      <version>0.10.0-incubating-SNAPSHOT</version>
+      <version>0.10.0-SNAPSHOT</version>
     </dependency>
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.enhancer.nlp</artifactId>
-      <version>0.10.0-incubating-SNAPSHOT</version>
+      <version>0.10.0-SNAPSHOT</version>
     </dependency>
     <dependency>
       <groupId>org.apache.felix</groupId>

Propchange: stanbol/trunk/enhancer/engines/opennlp-chunker/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 26 11:39:25 2012
@@ -0,0 +1,7 @@
+target
+
+.settings
+
+.classpath
+
+.project

Copied: stanbol/trunk/enhancer/engines/opennlp-chunker/pom.xml (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/pom.xml)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/opennlp-chunker/pom.xml?p2=stanbol/trunk/enhancer/engines/opennlp-chunker/pom.xml&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/pom.xml&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/pom.xml (original)
+++ stanbol/trunk/enhancer/engines/opennlp-chunker/pom.xml Mon Nov 26 11:39:25 2012
@@ -16,13 +16,13 @@
   <parent>
     <artifactId>org.apache.stanbol.enhancer.parent</artifactId>
     <groupId>org.apache.stanbol</groupId>
-    <version>0.10.0-incubating-SNAPSHOT</version>
+    <version>0.10.0-SNAPSHOT</version>
     <relativePath>../../parent</relativePath>
   </parent>
 
   <groupId>org.apache.stanbol</groupId>
   <artifactId>org.apache.stanbol.enhancer.engines.opennlp.chunker</artifactId>
-  <version>0.10.0-incubating-SNAPSHOT</version>
+  <version>0.10.0-SNAPSHOT</version>
   <packaging>bundle</packaging>
 
   <name>Apache Stanbol Enhancer Enhancement Engine: Chunking / Noun Phrase Detection</name>
@@ -36,12 +36,12 @@
 
   <scm>
     <connection>
-      scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/tika/
+      scm:svn:http://svn.apache.org/repos/asf/stanbol/trunk/enhancer/engines/tika/
     </connection>
     <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/tika/
+      scm:svn:https://svn.apache.org/repos/asf/stanbol/trunk/enhancer/engines/tika/
     </developerConnection>
-    <url>http://incubator.apache.org/stanbol/</url>
+    <url>http://stanbol.apache.org/</url>
   </scm>
 
   <properties>
@@ -91,17 +91,17 @@
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
-      <version>0.10.0-incubating-SNAPSHOT</version>
+      <version>0.10.0-SNAPSHOT</version>
     </dependency>
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.commons.opennlp</artifactId>
-      <version>0.10.0-incubating-SNAPSHOT</version>
+      <version>0.10.0-SNAPSHOT</version>
     </dependency>
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.enhancer.nlp</artifactId>
-      <version>0.10.0-incubating-SNAPSHOT</version>
+      <version>0.10.0-SNAPSHOT</version>
     </dependency>
     <dependency>
       <groupId>org.apache.felix</groupId>

Copied: stanbol/trunk/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java?p2=stanbol/trunk/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java (original)
+++ stanbol/trunk/enhancer/engines/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java Mon Nov 26 11:39:25 2012
@@ -5,7 +5,7 @@ import java.util.Map;
 
 import opennlp.tools.chunker.Chunker;
 
-import org.apache.stanbol.enhancer.nlp.TagSet;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
 import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
 import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
 

Modified: stanbol/trunk/enhancer/engines/opennlp-ner/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/opennlp-ner/pom.xml?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/opennlp-ner/pom.xml (original)
+++ stanbol/trunk/enhancer/engines/opennlp-ner/pom.xml Mon Nov 26 11:39:25 2012
@@ -87,6 +87,11 @@
       <version>0.10.0-SNAPSHOT</version>
     </dependency>
     <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.nlp</artifactId>
+      <version>0.10.0-SNAPSHOT</version>
+    </dependency>
+    <dependency>
         <groupId>org.apache.stanbol</groupId>
         <artifactId>org.apache.stanbol.commons.stanboltools.datafileprovider</artifactId>
         <version>0.9.0-incubating</version>

Modified: stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineConfig.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineConfig.java?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineConfig.java (original)
+++ stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineConfig.java Mon Nov 26 11:39:25 2012
@@ -6,12 +6,15 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.concurrent.CopyOnWriteArrayList;
 
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.stanbol.commons.opennlp.OpenNLP;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
 import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
 import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
 
@@ -32,10 +35,11 @@ public class NEREngineConfig {
     }
     
     /**
-     * Holds the mappings of rdf:type used by concepts to dc:type values used
-     * by TextAnnotations. 
+     * Holds the configured {@link NerTag}s - the mappings from the
+     * named entity name to the {@link UriRef} type used for the
+     * <code>dc:type</code> value for <code>fise:TextAnnotation</code>s
      */
-    private Map<String,UriRef> typeMappings = new HashMap<String,UriRef>(DEFAULT_ENTITY_TYPE_MAPPINGS);
+    private TagSet<NerTag> nerTagSet = new TagSet<NerTag>("NER TagSet");
     
     private Map<String,Collection<String>> additionalNerModels = new HashMap<String,Collection<String>>();
     /**
@@ -50,6 +54,12 @@ public class NEREngineConfig {
     
     private String defaultLanguage;
     
+    public NEREngineConfig(){
+        for(Entry<String,UriRef> mapping : DEFAULT_ENTITY_TYPE_MAPPINGS.entrySet()){
+            nerTagSet.addTag(new NerTag(mapping.getKey(), mapping.getValue()));
+        }
+    }
+    
     public synchronized void addCustomNameFinderModel(String lang, String modelFileName){
         if(lang == null || lang.isEmpty()){
             throw new IllegalArgumentException("The parsed lanaguage MUST NOT be NULL or empty!");
@@ -115,17 +125,40 @@ public class NEREngineConfig {
         Collection<String> modelNames = additionalNerModels.get(lang);
         return modelNames == null ? Collections.EMPTY_LIST : modelNames;
     }
-    
-    public UriRef getMappedType(String namedEntityType){
-        return typeMappings.get(namedEntityType);
+    /**
+     * Getter for the {@link NerTag} of the parsed Named Entity
+     * name. If not yet present a new {@link NerTag} (with no
+     * <code>dc:type</code> mapping) is created and added to the
+     * configuration.
+     * @param namedEntityType the NamedEntity name.
+     * @return the NerTag. Guaranteed to be not <code>null</code>
+     * @throws IllegalArgumentException if the parsed NamedEntity
+     * type is <code>null</code> or an empty String.
+     */
+    public NerTag getNerTag(String namedEntityType){
+        if(namedEntityType == null || namedEntityType.isEmpty()){
+            throw new IllegalArgumentException("The parsed NamedEntity string MUST NOT be NULL nor empty!");
+        }
+        NerTag tag = nerTagSet.getTag(namedEntityType);
+        if(tag == null){
+            tag = new NerTag(namedEntityType);
+            nerTagSet.addTag(tag);
+        }
+        return tag;
     }
+    /**
+     * Setter for a NamedEntity name &gt; <code>dc:tyoe</code>
+     * mapping.
+     * @param namedEntityType the Named Entity type (as
+     * used by the OpenNLP NameFinder model)
+     * @param dcType the <code>dc:Type</code> used for the
+     * NamedEntity or <code>nulll</code> if non
+     * @throws IllegalArgumentException if the parsed NamedEntity
+     * type is <code>null</code> or an empty String.
+     */
     public void setMappedType(String namedEntityType,UriRef dcType){
         if(namedEntityType != null && !namedEntityType.isEmpty()){
-            if(dcType == null){
-                typeMappings.remove(namedEntityType);
-            } else {
-                typeMappings.put(namedEntityType, dcType);
-            }
+            nerTagSet.addTag(new NerTag(namedEntityType, dcType));
         } else {
             throw new IllegalArgumentException("The parsed NamedEntity type MUST NOT be NULL nor empty!");
         }

Modified: stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java (original)
+++ stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java Mon Nov 26 11:39:25 2012
@@ -16,6 +16,7 @@
  */
 package org.apache.stanbol.enhancer.engines.opennlp.impl;
 
+import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.NER_ANNOTATION;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
@@ -41,23 +42,28 @@ import opennlp.tools.namefind.NameFinder
 import opennlp.tools.namefind.TokenNameFinderModel;
 import opennlp.tools.sentdetect.SentenceDetectorME;
 import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.tokenize.SimpleTokenizer;
 import opennlp.tools.tokenize.Tokenizer;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.Span;
 
 import org.apache.clerezza.rdf.core.Language;
-import org.apache.clerezza.rdf.core.Literal;
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
-import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang.StringUtils;
 import org.apache.stanbol.commons.opennlp.OpenNLP;
 import org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider;
+import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedTextUtils;
+import org.apache.stanbol.enhancer.nlp.model.Chunk;
+import org.apache.stanbol.enhancer.nlp.model.Section;
+import org.apache.stanbol.enhancer.nlp.model.Sentence;
+import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -66,8 +72,6 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
-import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
-import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -91,6 +95,7 @@ public abstract class NEREngineCore 
     
     protected NEREngineConfig config;
     
+    
     /** Comments about our models */
     public static final Map<String, String> DATA_FILE_COMMENTS;
     static {
@@ -135,32 +140,45 @@ public abstract class NEREngineCore 
                 + "method! -> This indicated an Bug in the implementation of the "
                 + "EnhancementJobManager!");
         }
-        Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
-        if(contentPart == null){
-            throw new IllegalStateException("No ContentPart with Mimetype '"
-                + TEXT_PLAIN_MIMETYPE+"' found for ContentItem "+ci.getUri()
-                + ": This is also checked in the canEnhance method! -> This "
-                + "indicated an Bug in the implementation of the "
-                + "EnhancementJobManager!");
-        }
-        String text;
-        try {
-            text = ContentItemHelper.getText(contentPart.getValue());
-        } catch (IOException e) {
-            throw new InvalidContentException(this, ci, e);
-        }
-        if (text.trim().length() == 0) {
-            // TODO: make the length of the data a field of the ContentItem
-            // interface to be able to filter out empty items in the canEnhance
-            // method
-            log.warn("ContentPart {} of ContentItem {} does not contain any text" +
-            		"to extract knowledge from in ContentItem {}", 
-            		contentPart.getKey(),ci);
-            return;
+        final AnalysedText at = AnalysedTextUtils.getAnalysedText(ci);
+        //validate data in the AnalysedText
+        final String text;
+        if(at != null && at.getTokens().hasNext()){ //if the AnalysedText is present and tokens are present
+            if(log.isDebugEnabled()){
+                log.debug("computeEnhancements from AnalysedText ContentPart of ContentItem {}: text={}",
+                    ci.getUri().getUnicodeString(), StringUtils.abbreviate(at.getSpan(), 100));
+            }
+            text = null;
+        } else { //no AnalysedText with tokens ...
+            //fallback to processing the plain text is still supported
+            Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
+            if(contentPart == null){
+                throw new IllegalStateException("No ContentPart with Mimetype '"
+                    + TEXT_PLAIN_MIMETYPE+"' found for ContentItem "+ci.getUri()
+                    + ": This is also checked in the canEnhance method! -> This "
+                    + "indicated an Bug in the implementation of the "
+                    + "EnhancementJobManager!");
+            }
+            try {
+                text = ContentItemHelper.getText(contentPart.getValue());
+            } catch (IOException e) {
+                throw new InvalidContentException(this, ci, e);
+            }
+            if (text.trim().length() == 0) {
+                // TODO: make the length of the data a field of the ContentItem
+                // interface to be able to filter out empty items in the canEnhance
+                // method
+                log.warn("ContentPart {} of ContentItem {} does not contain any text" +
+                        "to extract knowledge from in ContentItem {}", 
+                        contentPart.getKey(),ci);
+                return;
+            }
+            if(log.isDebugEnabled()){
+                log.debug("computeEnhancements from ContentPart {} of ContentItem {}: text={}",
+                    new Object[]{contentPart.getKey(),ci.getUri().getUnicodeString(), 
+                                 StringUtils.abbreviate(text, 100)});
+            }
         }
-        log.debug("computeEnhancements from ContentPart {} of ContentItem {}: text={}",
-            new Object[]{contentPart.getKey(),ci.getUri().getUnicodeString(), 
-                         StringUtils.abbreviate(text, 100)});
         try {
             if(config.isProcessedLangage(language)){
                 for (String defaultModelType : config.getDefaultModelTypes()) {
@@ -168,7 +186,7 @@ public abstract class NEREngineCore 
                     if(nameFinderModel == null){
                         log.info("No NER Model for {} and language {} available!",defaultModelType,language);
                     } else {
-                        findNamedEntities(ci, text, language, nameFinderModel);
+                        findNamedEntities(ci, at, text, language, nameFinderModel);
                     }
                 }
             } //else do not use default models for languages other than the processed one
@@ -178,7 +196,7 @@ public abstract class NEREngineCore 
                 try {
                     nameFinderModel = openNLP.getModel(TokenNameFinderModel.class, 
                         additionalModel, null);
-                    findNamedEntities(ci, text, language, nameFinderModel);
+                    findNamedEntities(ci, at, text, language, nameFinderModel);
                 } catch (IOException e) {
                     log.warn("Unable to load TokenNameFinderModel model for language '"+language
                         + "' (model: "+additionalModel+")",e);
@@ -197,6 +215,7 @@ public abstract class NEREngineCore 
     }
 
     protected void findNamedEntities(final ContentItem ci,
+                                     final AnalysedText at,
                                      final String text,
                                      final String lang,
                                      final TokenNameFinderModel nameFinderModel) {
@@ -204,8 +223,9 @@ public abstract class NEREngineCore 
         if (ci == null) {
             throw new IllegalArgumentException("Parsed ContentItem MUST NOT be NULL");
         }
-        if (text == null) {
-            log.warn("NULL was parsed as text for content item " + ci.getUri().getUnicodeString() + "! -> call ignored");
+        if (at == null && text == null) {
+            log.warn("NULL was parsed as AnalysedText AND Text for content item " 
+                    + ci.getUri() + ". One of the two MUST BE present! -> call ignored");
             return;
         }
         final Language language;
@@ -216,11 +236,17 @@ public abstract class NEREngineCore 
         }
         if(log.isDebugEnabled()){
             log.debug("findNamedEntities model={},  language={}, text=", 
-                    new Object[]{ nameFinderModel, language, StringUtils.abbreviate(text, 100) });
+                    new Object[]{ nameFinderModel, language, 
+                                  StringUtils.abbreviate(at != null ? at.getSpan() : text, 100) });
         }
         LiteralFactory literalFactory = LiteralFactory.getInstance();
         MGraph g = ci.getMetadata();
-        Map<String,List<NameOccurrence>> entityNames = extractNameOccurrences(nameFinderModel, text);
+        Map<String,List<NameOccurrence>> entityNames;
+        if(at != null){
+            entityNames = extractNameOccurrences(nameFinderModel, at, lang);
+        } else {
+            entityNames = extractNameOccurrences(nameFinderModel, text,lang);
+        }
         //lock the ContentItem while writing the RDF data for found Named Entities
         ci.getLock().writeLock().lock();
         try {
@@ -282,32 +308,74 @@ public abstract class NEREngineCore 
         }
     }
 
+    @Deprecated
     public Collection<String> extractPersonNames(String text) {
-        return extractNames(getNameModel("person","en"),text);
+        return extractPersonNames(text, "en");
+    }
+    public Collection<String> extractPersonNames(String text,String lang) {
+        return extractNames(getNameModel("person",lang),text);
     }
 
+    @Deprecated
     public Collection<String> extractLocationNames(String text) {
-        return extractNames(getNameModel("location","en"), text);
+        return extractLocationNames(text,"en");
     }
-
+    
+    public Collection<String> extractLocationNames(String text,String lang) {
+        return extractNames(getNameModel("location",lang), text);
+    }
+    
+    @Deprecated
     public Collection<String> extractOrganizationNames(String text) {
-        return extractNames(getNameModel("organization","en"), text);
+        return extractOrganizationNames(text,"en");
     }
-
+    public Collection<String> extractOrganizationNames(String text,String lang) {
+        return extractNames(getNameModel("organization",lang), text);
+    }
+    /**
+     * extracts the PersonName occurrences for English language texts
+     * @param text
+     * @return
+     * @deprecated use {@link #extractLocationNameOccurrences(String,String)} instead
+     */
+    @Deprecated
     public Map<String,List<NameOccurrence>> extractPersonNameOccurrences(String text) {
-        return extractNameOccurrences(getNameModel("person","en"), text);
+        return this.extractPersonNameOccurrences(text, "en");
     }
-
+    public Map<String,List<NameOccurrence>> extractPersonNameOccurrences(String text, String lang) {
+        return extractNameOccurrences(getNameModel("person",lang), text, lang);
+    }
+    /**
+     * extracts the LocationName occurrences for English language texts
+     * @param text
+     * @return
+     * @deprecated use {@link #extractLocationNameOccurrences(String,String)} instead
+     */
+    @Deprecated
     public Map<String,List<NameOccurrence>> extractLocationNameOccurrences(String text) {
-        return extractNameOccurrences(getNameModel("location","en"), text);
+        return extractLocationNameOccurrences(text, "en");
+    }
+    
+    public Map<String,List<NameOccurrence>> extractLocationNameOccurrences(String text,String lang) {
+        return extractNameOccurrences(getNameModel("location",lang), text,lang);
     }
 
+    /**
+     * extracts the OrganizationName occurrences for English language texts
+     * @param text
+     * @return
+     * @deprecated use {@link #extractOrganizationNamesOccurrences(String,String)} instead
+     */
+    @Deprecated
     public Map<String,List<NameOccurrence>> extractOrganizationNameOccurrences(String text) {
-        return extractNameOccurrences(getNameModel("organization","en"), text);
+        return extractOrganizationNameOccurrences(text,"en");
+    }
+    public Map<String,List<NameOccurrence>> extractOrganizationNameOccurrences(String text,String lang) {
+        return extractNameOccurrences(getNameModel("organization",lang), text,lang);
     }
 
     protected Collection<String> extractNames(TokenNameFinderModel nameFinderModel, String text) {
-        return extractNameOccurrences(nameFinderModel, text).keySet();
+        return extractNameOccurrences(nameFinderModel, text, nameFinderModel.getLanguage()).keySet();
     }
 
     /**
@@ -339,16 +407,28 @@ public abstract class NEREngineCore 
                 type,language),e);
         }
     }
+    /**
+     * Loads the {@link SentenceModel} for the parsed language or
+     * English as fallback if one for the language is not available
+     * @param language
+     * @return
+     */
     private SentenceModel getSentenceModel(String language) {
         try {
             SentenceModel model = openNLP.getSentenceModel(language);
             if(model != null){
                 return model;
-            } else {
-                throw new IllegalStateException(String.format(
-                    "Unable to built Model for extracting sentences from '%s' " +
-                    "language texts because the model data could not be loaded.",
-                    language));
+            } else { //fallback to english
+                log.info("No sentence detection modle for {}. fallback to English");    
+                model = openNLP.getSentenceModel("en");
+                if(model == null){
+                    throw new IllegalStateException(String.format(
+                        "Unable to built Model for extracting sentences neither for '%s' " +
+                        "nor the fallback language 'en'.",
+                        language));
+                } else {
+                    return model;
+                }
             }
         } catch (InvalidFormatException e) {
             throw new IllegalStateException(String.format(
@@ -360,10 +440,82 @@ public abstract class NEREngineCore 
                 language),e);
         }
     }
-    
-    protected Map<String,List<NameOccurrence>> extractNameOccurrences(TokenNameFinderModel nameFinderModel,
-                                                                      String text) {
+    /**
+     * THis method extracts NamedEntity occurrences by using existing {@link Token}s and 
+     * {@link Sentence}s in the parsed {@link AnalysedText}.
+     * @param nameFinderModel the model used to find NamedEntities
+     * @param at the Analysed Text
+     * @param language the language of the text
+     * @return the found named Entity Occurrences
+     */
+    protected Map<String,List<NameOccurrence>> extractNameOccurrences(TokenNameFinderModel nameFinderModel, 
+        AnalysedText at, String language) {
+        // version with explicit sentence endings to reflect heading / paragraph
+        // structure of an HTML or PDF document converted to text
 
+        NameFinderME finder = new NameFinderME(nameFinderModel);
+        Map<String,List<NameOccurrence>> nameOccurrences = new LinkedHashMap<String,List<NameOccurrence>>();
+        List<Section> sentences = new ArrayList<Section>();
+        //Holds the tokens of the previouse (pos 0) current (pos 1) and next (pos 2) sentence
+        AnalysedTextUtils.appandToList(at.getSentences(), sentences);
+        if(!sentences.isEmpty()){ //no sentence annotations
+            sentences.add(at); //process as a single section
+        }
+        for (int i=0;i<sentences.size();i++) {
+            String sentence = sentences.get(i).getSpan();
+            
+            // build a context by concatenating three sentences to be used for
+            // similarity ranking / disambiguation + contextual snippet in the
+            // extraction structure
+            List<String> contextElements = new ArrayList<String>();
+            contextElements.add(sentence);
+            //three sentences as context
+            String context = at.getSpan().substring(
+                sentences.get(Math.max(0, i-1)).getStart(),
+                sentences.get(Math.min(sentences.size()-1, i+1)).getEnd());
+
+            // get the tokens, words of the current sentence
+            List<Token> tokens = new ArrayList<Token>(32);
+            List<String> words = new ArrayList<String>(32);
+            for(Iterator<Token> it =sentences.get(i).getTokens();it.hasNext();){
+                Token t = it.next();
+                tokens.add(t);
+                words.add(t.getSpan());
+            }
+            Span[] nameSpans = finder.find(words.toArray(new String[words.size()]));
+            double[] probs = finder.probs();
+            //int lastStartPosition = 0;
+            for (int j = 0; j < nameSpans.length; j++) {
+                String name = at.getSpan().substring(tokens.get(nameSpans[j].getStart()).getStart(), 
+                    tokens.get(nameSpans[j].getEnd()-1).getEnd());
+                Double confidence = 1.0;
+                for (int k = nameSpans[j].getStart(); k < nameSpans[j].getEnd(); k++) {
+                    confidence *= probs[k];
+                }
+                int start = tokens.get(nameSpans[j].getStart()).getStart();
+                int end = start + name.length();
+                NerTag nerTag = config.getNerTag(nameSpans[j].getType());
+                //create the occurrence for writing fise:TextAnnotations
+                NameOccurrence occurrence = new NameOccurrence(name, start, end, nerTag.getType(),
+                    context, confidence);
+                List<NameOccurrence> occurrences = nameOccurrences.get(name);
+                if (occurrences == null) {
+                    occurrences = new ArrayList<NameOccurrence>();
+                }
+                occurrences.add(occurrence);
+                nameOccurrences.put(name, occurrences);
+                //add also the NerAnnotation to the AnalysedText
+                Chunk chunk = at.addChunk(start, end);
+                //TODO: build AnnotationModel based on the configured Mappings
+                chunk.addAnnotation(NER_ANNOTATION, Value.value(nerTag, confidence));
+            }
+        }
+        finder.clearAdaptiveData();
+        log.debug("{} name occurrences found: {}", nameOccurrences.size(), nameOccurrences);
+        return nameOccurrences;
+    }    
+    
+    protected Map<String,List<NameOccurrence>> extractNameOccurrences(TokenNameFinderModel nameFinderModel, String text, String language) {
         // version with explicit sentence endings to reflect heading / paragraph
         // structure of an HTML or PDF document converted to text
         String textWithDots = text.replaceAll("\\n\\n", ".\n");
@@ -374,7 +526,7 @@ public abstract class NEREngineCore 
         Span[] sentenceSpans = sentenceDetector.sentPosDetect(textWithDots);
 
         NameFinderME finder = new NameFinderME(nameFinderModel);
-        Tokenizer tokenizer = SimpleTokenizer.INSTANCE;
+        Tokenizer tokenizer = openNLP.getTokenizer(language);
         Map<String,List<NameOccurrence>> nameOccurrences = new LinkedHashMap<String,List<NameOccurrence>>();
         for (int i = 0; i < sentenceSpans.length; i++) {
             String sentence = sentenceSpans[i].getCoveredText(text).toString().trim();
@@ -411,9 +563,9 @@ public abstract class NEREngineCore 
                 int start = tokenSpans[nameSpans[j].getStart()].getStart();
                 int absoluteStart = sentenceSpans[i].getStart() + start;
                 int absoluteEnd = absoluteStart + name.length();
-                UriRef mappedType = config.getMappedType(nameSpans[j].getType());
+                NerTag nerTag = config.getNerTag(nameSpans[j].getType());
                 NameOccurrence occurrence = new NameOccurrence(name, absoluteStart, absoluteEnd, 
-                    mappedType, context, confidence);
+                    nerTag.getType(),context, confidence);
 
                 List<NameOccurrence> occurrences = nameOccurrences.get(name);
                 if (occurrences == null) {