You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/05/14 23:07:21 UTC

svn commit: r1338425 [1/2] - in /incubator/stanbol/branches/celi-enhancement-engines: bundlelist/src/main/bundles/ engines/ engines/celi/ engines/celi/src/ engines/celi/src/main/ engines/celi/src/main/java/ engines/celi/src/main/java/org/ engines/celi/...

Author: rwesten
Date: Mon May 14 21:07:19 2012
New Revision: 1338425

URL: http://svn.apache.org/viewvc?rev=1338425&view=rev
Log:
Initial commit of the latest patch for STANBOL-583 including applying the Stanbol Enhancement Structure validation introduced by STANBOL-612; making supported languages configureable; and some other minors. NOTE: that unit tests for the CELI NER engine fails as there seam to be some bugs related XML entity encoding and/or char encoding resulting in wrong selected text and wrong start/end positions.

Added:
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/   (with props)
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/pom.xml
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/Concept.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/GuessedLanguage.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LexicalEntry.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NamedEntity.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/log4j.properties
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/test_utils/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/test_utils/MockComponentContext.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/resources/
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/resources/log4j.properties
Modified:
    incubator/stanbol/branches/celi-enhancement-engines/bundlelist/src/main/bundles/list.xml
    incubator/stanbol/branches/celi-enhancement-engines/engines/pom.xml

Modified: incubator/stanbol/branches/celi-enhancement-engines/bundlelist/src/main/bundles/list.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/bundlelist/src/main/bundles/list.xml?rev=1338425&r1=1338424&r2=1338425&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/bundlelist/src/main/bundles/list.xml (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/bundlelist/src/main/bundles/list.xml Mon May 14 21:07:19 2012
@@ -177,6 +177,11 @@
       <artifactId>org.apache.stanbol.enhancer.engines.geonames</artifactId>
       <version>0.10.0-incubating-SNAPSHOT</version>
     </bundle>
+    <bundle>  <!-- http://linguagrid.org/ -->
+      <groupId>org.apache.stanbol</groupId>
+	  <artifactId>org.apache.stanbol.enhancer.engines.celi</artifactId>
+      <version>0.10.0-incubating-SNAPSHOT</version>
+    </bundle>
   </startLevel>
   
   <!-- Default Configuration for the Stanbol Enhancer -->

Propchange: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon May 14 21:07:19 2012
@@ -0,0 +1,7 @@
+.classpath
+
+.project
+
+target
+
+.settings

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/pom.xml?rev=1338425&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/pom.xml (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/pom.xml Mon May 14 21:07:19 2012
@@ -0,0 +1,146 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	You under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+	<modelVersion>4.0.0</modelVersion>
+
+	<parent>
+		<groupId>org.apache.stanbol</groupId>
+		<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
+		<version>0.10.0-incubating-SNAPSHOT</version>
+		<relativePath>../../parent</relativePath>
+	</parent>
+
+	<groupId>org.apache.stanbol</groupId>
+	<artifactId>org.apache.stanbol.enhancer.engines.celi</artifactId>
+	<packaging>bundle</packaging>
+
+	<name>Apache Stanbol Enhancer Enhancement Engine: CELI  </name>
+	<description></description>
+	<inceptionYear>2012</inceptionYear>
+
+
+	<dependencies>
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
+            <version>0.10.0-incubating-SNAPSHOT</version>
+		</dependency>
+
+		<dependency>
+			<groupId>org.apache.stanbol</groupId>
+			<artifactId>org.apache.stanbol.commons.stanboltools.datafileprovider</artifactId>
+            <version>0.9.0-incubating</version>
+		</dependency>
+
+		<dependency>
+			<groupId>org.apache.clerezza</groupId>
+			<artifactId>rdf.core</artifactId>
+		</dependency>
+
+		<dependency>
+			<groupId>org.apache.felix</groupId>
+			<artifactId>org.apache.felix.scr.annotations</artifactId>
+			<scope>provided</scope>
+		</dependency>
+
+		<!-- generic tax -->
+        <dependency>
+            <groupId>commons-lang</groupId>
+            <artifactId>commons-lang</artifactId>
+        </dependency> 
+		<dependency>
+			<groupId>org.apache.httpcomponents</groupId>
+			<artifactId>httpclient-osgi</artifactId>
+		</dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+	        <groupId>org.apache.stanbol</groupId>
+	        <artifactId>org.apache.stanbol.commons.stanboltools.offline</artifactId>
+            <version>0.9.0-incubating</version>
+	        <scope>provided</scope>
+    	</dependency> 
+		
+		<!-- test -->
+        <dependency>
+            <groupId>org.apache.stanbol</groupId>
+            <artifactId>org.apache.stanbol.enhancer.test</artifactId>
+            <version>0.10.0-incubating-SNAPSHOT</version>
+            <scope>test</scope>
+        </dependency>
+		<dependency>
+            <groupId>org.apache.stanbol</groupId>
+            <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+            <version>0.10.0-incubating-SNAPSHOT</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency> <!-- we use log4j 1.2 -->
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-log4j12</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>log4j</groupId>
+            <artifactId>log4j</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+		
+	</dependencies>
+
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-surefire-plugin</artifactId>
+				<configuration>
+					<skipTests>false</skipTests>
+				</configuration>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-bundle-plugin</artifactId>
+				<extensions>true</extensions>
+				<configuration>
+					<instructions>
+						<Private-Package>
+							org.apache.stanbol.enhancer.engines.celi.ner.impl.*, 
+							org.apache.stanbol.enhancer.engines.celi.langid.impl.*, 
+							org.apache.stanbol.enhancer.engines.celi.classification.impl.*, 
+							org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.*
+						</Private-Package>
+						<!-- <Embed-Dependency>true</Embed-Dependency> 
+						<Embed-Transitive>true</Embed-Transitive>  -->
+						<Import-Package>
+							org.apache.http,
+							*;resolution:=optional
+						</Import-Package>
+					</instructions>
+				</configuration>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.felix</groupId>
+				<artifactId>maven-scr-plugin</artifactId>
+			</plugin>
+		</plugins>
+	</build>
+
+</project>
\ No newline at end of file

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java?rev=1338425&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java Mon May 14 21:07:19 2012
@@ -0,0 +1,252 @@
+package org.apache.stanbol.enhancer.engines.celi.classification.impl;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CREATOR;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+import java.io.IOException;
+import java.net.URL;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Vector;
+import java.util.Map.Entry;
+
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NoConvertorException;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+@Component(immediate = true, metatype = true)
+@Service
+@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "celiClassification") })
+public class CeliClassificationEnhancementEngine extends AbstractEnhancementEngine<IOException, RuntimeException> implements EnhancementEngine, ServiceProperties {
+	
+	/**
+	 * This ensures that no connections to external services are made if Stanbol is started in offline mode 
+	 * as the OnlineMode service will only be available if OfflineMode is deactivated. 
+	 */
+	@Reference
+    private OnlineMode onlineMode; 
+	
+	private static List<String> supportedLangs = new Vector<String>();
+	static {
+		supportedLangs.add("en");
+		supportedLangs.add("fr");
+		supportedLangs.add("de");
+		supportedLangs.add("it");
+		supportedLangs.add("es");
+		supportedLangs.add("pt");
+		supportedLangs.add("pl");
+		supportedLangs.add("nl");
+	}
+
+	/**
+	 * The literal representing the LangIDEngine as creator.
+	 */
+	public static final Literal LANG_ID_ENGINE_NAME = LiteralFactory.getInstance().createTypedLiteral("org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine");
+
+	/**
+	 * The default value for the Execution of this Engine. Currently set to
+	 * {@link ServiceProperties#ORDERING_CONTENT_EXTRACTION}
+	 */
+	public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION;
+
+	private Logger log = LoggerFactory.getLogger(getClass());
+
+	private String language = null;
+
+	/**
+	 * This contains the only MIME type directly supported by this enhancement
+	 * engine.
+	 */
+	private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
+
+	/**
+	 * Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE}
+	 */
+	private static final Set<String> SUPPORTED_MIMTYPES = Collections.singleton(TEXT_PLAIN_MIMETYPE);
+
+	@Property
+	public static final String LICENSE_KEY = "org.apache.stanbol.enhancer.engines.celi.classification.impl.CeliClassificationEnhancementEngine.license";
+
+	@Property(value = "http://linguagrid.org/LSGrid/ws/dbpedia-classification")
+	public static final String SERVICE_URL = "org.apache.stanbol.enhancer.engines.celi.classification.impl.CeliClassificationEnhancementEngine.url";
+
+	private String licenseKey;
+	private URL serviceURL;
+
+	private ClassificationClientHTTP client;
+
+	@Override
+	@Activate
+	protected void activate(ComponentContext ctx) throws IOException, ConfigurationException {
+		super.activate(ctx);
+		Dictionary<String, Object> properties = ctx.getProperties();
+		this.licenseKey = (String) properties.get(LICENSE_KEY);
+		if (licenseKey == null || licenseKey.isEmpty()) {
+			log.warn("no CELI license key configured for this Engine, a guest account will be used (max 100 requests per day). Go on http://linguagrid.org for getting a proper license key.");
+		}
+		String url = (String) properties.get(SERVICE_URL);
+		if (url == null || url.isEmpty()) {
+			throw new ConfigurationException(SERVICE_URL, String.format("%s : please configure the URL of the CELI Web Service (e.g. by" + "using the 'Configuration' tab of the Apache Felix Web Console).", getClass().getSimpleName()));
+		}
+		this.serviceURL = new URL(url);
+		this.client = new ClassificationClientHTTP(this.serviceURL, this.licenseKey);
+	}
+	
+	@Override
+	@Deactivate
+	protected void deactivate(ComponentContext ce) {
+		super.deactivate(ce);
+	}
+
+	@Override
+	public int canEnhance(ContentItem ci) throws EngineException {
+		this.language = extractLanguage(ci);
+		if (language == null) {
+			throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
+		}
+
+		if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null && this.isLangSupported(language))
+			return ENHANCE_ASYNC;
+		else
+			return CANNOT_ENHANCE;
+	}
+
+
+	/**
+     * Extracts the language of the parsed ContentItem from the metadata
+     * @param ci the content item
+     * @return the language
+     */
+    private String extractLanguage(ContentItem ci) {
+        MGraph metadata = ci.getMetadata();
+        Iterator<Triple> langaugeEnhancementCreatorTriples = 
+            metadata.filter(null, DC_CREATOR, LANG_ID_ENGINE_NAME);
+        if(langaugeEnhancementCreatorTriples.hasNext()){
+            String lang = EnhancementEngineHelper.getString(metadata, 
+                langaugeEnhancementCreatorTriples.next().getSubject(), DC_LANGUAGE);
+            if(lang != null){
+                return lang;
+            } else {
+                log.info("Unable to extract language for ContentItem "+ci.getUri().getUnicodeString()+"! The Enhancement of the "+LANG_ID_ENGINE_NAME.getLexicalForm()+
+                		" is missing the "+DC_LANGUAGE+" property ... return '{}' as default");
+                return null;
+            }
+        } else {
+        	
+        	Iterator<Triple> it = metadata.filter(null, DC_LANGUAGE, null);
+            if (it.hasNext()) {
+                Resource res = it.next().getObject();
+                if (res instanceof Literal) {
+                    return ((Literal) res).getLexicalForm();
+                } else {
+                    return res.toString();
+                }
+            }
+        	
+            log.warn("Unable to extract language for ContentItem "+ci.getUri().getUnicodeString()+"! Is the "+LANG_ID_ENGINE_NAME.getLexicalForm()+" active?  ... return '{}' as default");
+            return null;
+        }
+    }
+
+	@Override
+	public void computeEnhancements(ContentItem ci) throws EngineException {
+		if (this.language == null)
+			this.language = extractLanguage(ci);
+
+		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
+		if (contentPart == null) {
+			throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This "
+					+ "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
+		}
+		String text = "";
+		try {
+			text = ContentItemHelper.getText(contentPart.getValue());
+		} catch (IOException e) {
+			throw new InvalidContentException(this, ci, e);
+		}
+		if (text.trim().length() == 0) {
+			log.info("No text contained in ContentPart {"+contentPart.getKey()+"} of ContentItem {"+ci.getUri()+"}");
+			return;
+		}
+
+		try {
+			
+			List<Concept> lista = this.client.extractConcepts(text, language);
+			LiteralFactory literalFactory = LiteralFactory.getInstance();
+
+			MGraph g = ci.getMetadata();
+
+			UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
+
+			for (Concept ne : lista) {
+				List<UriRef> uris = this.getEntityRefForType(ne.getClassLabel());
+
+				try {
+					for (UriRef uri : uris)
+						g.add(new TripleImpl(textAnnotation, DC_RELATION, uri));
+					g.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(ne.getConfidence())));
+				} catch (NoConvertorException e) {
+					log.error(e.getMessage(),e);
+				}
+			}
+		} catch (Exception e) {
+			log.error(e.getMessage(),e);
+		}
+
+	}
+
+	private boolean isLangSupported(String language) {
+		if (supportedLangs.contains(language))
+			return true;
+		else
+			return false;
+	}
+
+	private List<UriRef> getEntityRefForType(String classificationLabels) {
+		List<UriRef> refs = new Vector<UriRef>();
+		String[] tmps = classificationLabels.split(" ");
+		for (String dbPediaLabel : tmps) {
+			refs.add(new UriRef(NamespaceEnum.dbpedia_ont + dbPediaLabel));
+		}
+		return refs;
+	}
+
+	@Override
+	public Map<String, Object> getServiceProperties() {
+		return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
+	}
+
+}

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java?rev=1338425&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ClassificationClientHTTP.java Mon May 14 21:07:19 2012
@@ -0,0 +1,133 @@
+package org.apache.stanbol.enhancer.engines.celi.classification.impl;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Vector;
+
+import javax.xml.soap.MessageFactory;
+import javax.xml.soap.SOAPBody;
+import javax.xml.soap.SOAPMessage;
+import javax.xml.soap.SOAPPart;
+import javax.xml.transform.stream.StreamSource;
+
+import org.apache.clerezza.rdf.core.impl.util.Base64;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+public class ClassificationClientHTTP {
+	
+	private final Logger log = LoggerFactory.getLogger(getClass());
+	
+	private static final int maxResultToReturn = 3;
+	
+	private URL serviceEP;
+	private String licenseKey;
+	
+	
+	public ClassificationClientHTTP(URL serviceUrl, String licenseKey){
+		this.serviceEP=serviceUrl;
+		this.licenseKey=licenseKey;
+	}
+	
+	
+	public String doPostRequest(URL url, String body) throws IOException {
+		
+		HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
+		urlConn.setRequestMethod("POST");
+		urlConn.setDoInput(true);
+		if (null != body) {
+			urlConn.setDoOutput(true);
+		} else {
+			urlConn.setDoOutput(false);
+		}
+		urlConn.setUseCaches(false);
+		String	contentType = "text/xml; charset=utf-8";
+		urlConn.setRequestProperty("Content-Type", contentType);
+		if(this.licenseKey!=null){
+			String encoded = Base64.encode(this.licenseKey.getBytes("UTF-8"));
+			urlConn.setRequestProperty("Authorization", "Basic "+encoded);
+		}
+		
+		// send POST output
+		if (null != body) {
+			OutputStreamWriter printout = new OutputStreamWriter(urlConn.getOutputStream(), "UTF-8");
+			printout.write(body);
+			printout.flush();
+			printout.close();
+		}
+		
+		//close connection
+		urlConn.disconnect();
+		
+		// get response data
+		return IOUtils.toString(urlConn.getInputStream(), "UTF-8");
+	}
+
+
+	public List<Concept> extractConcepts(String text,String lang) {
+		List<Concept> extractedConcepts = new Vector<Concept>();
+
+		try {
+			String txt = StringEscapeUtils.escapeXml(text);
+			String xmldata = "<soapenv:Envelope xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\" xmlns:clas=\"http://linguagrid.org/v20110204/classification\"><soapenv:Header/><soapenv:Body> <clas:classify>"
+							+"<clas:user>wiki</clas:user><clas:model>"+lang+"</clas:model><clas:text>"+txt+"</clas:text></clas:classify></soapenv:Body></soapenv:Envelope>";
+
+			
+			String responseXml = doPostRequest(this.serviceEP, xmldata);
+			log.debug(responseXml);
+
+			// Create SoapMessage
+			MessageFactory msgFactory = MessageFactory.newInstance();
+			SOAPMessage message = msgFactory.createMessage();
+			SOAPPart soapPart = message.getSOAPPart();
+
+			// Load the SOAP text into a stream source
+			ByteArrayInputStream stream = new ByteArrayInputStream(responseXml.getBytes("UTF-8"));
+			StreamSource source = new StreamSource(stream);
+
+			// Set contents of message
+			soapPart.setContent(source);
+
+			SOAPBody soapBody = message.getSOAPBody();
+			NodeList nlist = soapBody.getElementsByTagNameNS("*","return");
+			HashSet<String> inserted=new HashSet<String>();
+			for (int i = 0; i < nlist.getLength() && i<maxResultToReturn; i++) {
+				try {
+					Element result = (Element) nlist.item(i);
+
+					String model = result.getElementsByTagNameNS("*","label").item(0).getTextContent();
+					model=model.substring(1, model.length()-1);
+					String conf=result.getElementsByTagNameNS("*","score").item(0).getTextContent();
+					float confidence=Float.parseFloat(conf);
+					
+					String[] tmps=model.split(" ");
+					
+					for(String t: tmps){
+						if(!inserted.contains(t)){
+							extractedConcepts.add(new Concept(t, confidence));
+							inserted.add(t);
+						}
+					}
+				} catch (Exception e) {
+					e.printStackTrace();
+				}
+
+			}
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+		
+		return extractedConcepts;
+	}
+
+}

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/Concept.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/Concept.java?rev=1338425&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/Concept.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/Concept.java Mon May 14 21:07:19 2012
@@ -0,0 +1,28 @@
+package org.apache.stanbol.enhancer.engines.celi.classification.impl;
+
+public class Concept {
+	
+	private String classLabel;
+	private float confidence;
+	
+	public Concept(String classLabel, float confidence) {
+		super();
+		this.classLabel = classLabel;
+		this.confidence = confidence;
+	}
+	
+	public String getClassLabel() {
+		return classLabel;
+	}
+	public void setClassLabel(String classLabel) {
+		this.classLabel = classLabel;
+	}
+	public float getConfidence() {
+		return confidence;
+	}
+	public void setConfidence(float confidence) {
+		this.confidence = confidence;
+	}
+	
+	
+}

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java?rev=1338425&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngine.java Mon May 14 21:07:19 2012
@@ -0,0 +1,160 @@
+package org.apache.stanbol.enhancer.engines.celi.langid.impl;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Map.Entry;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+@Component(immediate = true, metatype = true)
+@Service
+@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "celiLangid") })
+public class CeliLanguageIdentifierEnhancementEngine extends AbstractEnhancementEngine<IOException, RuntimeException> implements EnhancementEngine, ServiceProperties {
+	/**
+	 * This ensures that no connections to external services are made if Stanbol is started in offline mode 
+	 * as the OnlineMode service will only be available if OfflineMode is deactivated. 
+	 */
+	@Reference
+    private OnlineMode onlineMode; 
+	
+	public static final Integer defaultOrder = ServiceProperties.ORDERING_PRE_PROCESSING -2;
+
+	private Logger log = LoggerFactory.getLogger(getClass());
+	/**
+	 * This contains the only MIME type directly supported by this enhancement
+	 * engine.
+	 */
+	private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
+
+	/**
+	 * Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE}
+	 */
+	private static final Set<String> SUPPORTED_MIMTYPES = Collections.singleton(TEXT_PLAIN_MIMETYPE);
+
+	@Property
+	public static final String LICENSE_KEY = "org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.license";
+
+	@Property(value = "http://linguagrid.org/LSGrid/ws/language-identifier")
+	public static final String SERVICE_URL = "org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.url";
+
+	private String licenseKey;
+	private URL serviceURL;
+
+	private LanguageIdentifierClientHTTP client;
+
+	@Override
+	@Activate
+	public void activate(ComponentContext ctx) throws IOException, ConfigurationException {
+		super.activate(ctx);
+		Dictionary<String, Object> properties = ctx.getProperties();
+		this.licenseKey = (String) properties.get(LICENSE_KEY);
+		if (licenseKey == null || licenseKey.isEmpty()) {
+			log.warn("no CELI license key configured for this Engine, a guest account will be used (max 100 requests per day). Go on http://linguagrid.org for getting a proper license key.");
+		}
+		String url = (String) properties.get(SERVICE_URL);
+		if (url == null || url.isEmpty()) {
+			throw new ConfigurationException(SERVICE_URL, String.format("%s : please configure the URL of the CELI Web Service (e.g. by" + "using the 'Configuration' tab of the Apache Felix Web Console).", getClass().getSimpleName()));
+		}
+		this.serviceURL = new URL(url);
+		this.client = new LanguageIdentifierClientHTTP(this.serviceURL, this.licenseKey);
+	}
+	
+	@Override
+	@Deactivate
+	protected void deactivate(ComponentContext ce) {
+		super.deactivate(ce);
+	}
+
+	@Override
+	public int canEnhance(ContentItem ci) throws EngineException {
+		if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null) {
+			return ENHANCE_ASYNC;
+		} else {
+			return CANNOT_ENHANCE;
+		}
+	}
+	
+	@Override
+	public void computeEnhancements(ContentItem ci) throws EngineException {
+		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
+		if (contentPart == null) {
+			throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This "
+					+ "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
+		}
+		String text = "";
+		try {
+			text = ContentItemHelper.getText(contentPart.getValue());
+		} catch (IOException e) {
+			throw new InvalidContentException(this, ci, e);
+		}
+		if (text.trim().length() == 0) {
+			log.info("No text contained in ContentPart {"+contentPart.getKey()+"} of ContentItem {"+ci.getUri()+"}");
+			return;
+		}
+		
+		try {
+			
+			String[] tmps=text.split(" ");
+			List<GuessedLanguage> lista = null;
+			if(tmps.length>5)
+				lista = this.client.guessLanguage(text);
+			else 
+				lista = this.client.guessQueryLanguage(text);
+			LiteralFactory literalFactory = LiteralFactory.getInstance();
+			
+			MGraph g = ci.getMetadata();
+					
+			GuessedLanguage gl = lista.get(0);
+			UriRef textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
+		    g.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new PlainLiteralImpl(gl.getLang())));
+			g.add(new TripleImpl(textEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(gl.getConfidence())));
+		
+			
+		} catch (Exception e) {
+			log.error(e.getMessage(),e);
+		}
+
+	}
+	
+	
+	@Override
+	public Map<String, Object> getServiceProperties() {
+		return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
+	}
+
+
+}
\ No newline at end of file

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/GuessedLanguage.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/GuessedLanguage.java?rev=1338425&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/GuessedLanguage.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/GuessedLanguage.java Mon May 14 21:07:19 2012
@@ -0,0 +1,30 @@
+package org.apache.stanbol.enhancer.engines.celi.langid.impl;
+
+public class GuessedLanguage {
+
+	private String lang;
+	private double confidence;
+
+	public GuessedLanguage(String lang, double d) {
+		this.lang=lang;
+		this.confidence=d;
+	}
+
+	public String getLang() {
+		return lang;
+	}
+
+	public void setLang(String lang) {
+		this.lang = lang;
+	}
+
+	public double getConfidence() {
+		return confidence;
+	}
+
+	public void setConfidence(double confidence) {
+		this.confidence = confidence;
+	}
+	
+	
+}

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java?rev=1338425&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/LanguageIdentifierClientHTTP.java Mon May 14 21:07:19 2012
@@ -0,0 +1,165 @@
+package org.apache.stanbol.enhancer.engines.celi.langid.impl;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.net.HttpURLConnection;
+import java.net.URI;
+import java.net.URL;
+import java.util.List;
+import java.util.Vector;
+
+import javax.xml.soap.MessageFactory;
+import javax.xml.soap.SOAPBody;
+import javax.xml.soap.SOAPMessage;
+import javax.xml.soap.SOAPPart;
+import javax.xml.transform.stream.StreamSource;
+
+import org.apache.clerezza.rdf.core.impl.util.Base64;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+public class LanguageIdentifierClientHTTP {
+	
+	private URL serviceEP;
+	private String licenseKey;
+	
+	private final Logger log = LoggerFactory.getLogger(getClass());
+
+	
+	public LanguageIdentifierClientHTTP(URL serviceUrl, String licenseKey){
+		this.serviceEP=serviceUrl;
+		this.licenseKey=licenseKey;
+	}
+		
+	
+	public String doPostRequest(URL url, String body) throws IOException {
+		HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
+		urlConn.setRequestMethod("POST");
+		urlConn.setDoInput(true);
+		if (null != body) {
+			urlConn.setDoOutput(true);
+		} else {
+			urlConn.setDoOutput(false);
+		}
+		urlConn.setUseCaches(false);
+		String	contentType = "text/xml; charset=utf-8";
+		urlConn.setRequestProperty("Content-Type", contentType);
+		if(this.licenseKey!=null){
+			String encoded = Base64.encode(this.licenseKey.getBytes("UTF-8"));
+			urlConn.setRequestProperty("Authorization", "Basic "+encoded);
+		}
+		
+		// send POST output
+		if (null != body) {
+			OutputStreamWriter printout = new OutputStreamWriter(urlConn.getOutputStream(), "UTF-8");
+			printout.write(body);
+			printout.flush();
+			printout.close();
+		}
+		
+		//close connection
+		urlConn.disconnect();
+		
+		// get response data
+		return IOUtils.toString(urlConn.getInputStream(), "UTF8");
+	}
+
+
+	
+	public List<GuessedLanguage> guessQueryLanguage(String text){
+		List<GuessedLanguage> guesses = new Vector<GuessedLanguage>();
+
+		try {
+			String txt = StringEscapeUtils.escapeXml(text);
+			String xmldata = "<soapenv:Envelope xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\" xmlns:lan=\"http://research.celi.it/LanguageIdentifierWS\"><soapenv:Header/><soapenv:Body>"
+					+"<lan:guessQueryLanguage><textToGuess>"+txt+"</textToGuess></lan:guessQueryLanguage></soapenv:Body></soapenv:Envelope>";
+			
+			
+			String responseXml = doPostRequest(this.serviceEP, xmldata);
+			log.debug(responseXml);
+
+			// Create SoapMessage
+			MessageFactory msgFactory = MessageFactory.newInstance();
+			SOAPMessage message = msgFactory.createMessage();
+			SOAPPart soapPart = message.getSOAPPart();
+
+			// Load the SOAP text into a stream source
+			ByteArrayInputStream stream = new ByteArrayInputStream(responseXml.getBytes("UTF-8"));
+			StreamSource source = new StreamSource(stream);
+
+			// Set contents of message
+			soapPart.setContent(source);
+
+			SOAPBody soapBody = message.getSOAPBody();
+			NodeList nlist = soapBody.getElementsByTagNameNS("*","return");
+			for (int i = 0; i < nlist.getLength(); i++) {
+				try {
+					Element result = (Element) nlist.item(i);
+					String lang = result.getAttribute("language");
+					double d=Double.parseDouble(result.getAttribute("guessConfidence"));
+					
+					guesses.add(new GuessedLanguage(lang, d));
+				} catch (Exception e) {
+					e.printStackTrace();
+				}
+
+			}
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+
+		return guesses;
+	}
+	
+	public List<GuessedLanguage> guessLanguage(String text) {
+
+		List<GuessedLanguage> guesses = new Vector<GuessedLanguage>();
+
+		try {
+			String txt = StringEscapeUtils.escapeXml(text);
+			String xmldata = "<soapenv:Envelope xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\" xmlns:lan=\"http://research.celi.it/LanguageIdentifierWS\"><soapenv:Header/><soapenv:Body>"
+					+"<lan:guessLanguage><textToGuess>"+txt+"</textToGuess></lan:guessLanguage></soapenv:Body></soapenv:Envelope>";
+
+			URI uri = new URI("http://linguagrid.org/LSGrid/ws/language-identifier");
+			
+			String responseXml = doPostRequest(uri.toURL(), xmldata);
+			log.debug(responseXml);
+
+			// Create SoapMessage
+			MessageFactory msgFactory = MessageFactory.newInstance();
+			SOAPMessage message = msgFactory.createMessage();
+			SOAPPart soapPart = message.getSOAPPart();
+
+			// Load the SOAP text into a stream source
+			ByteArrayInputStream stream = new ByteArrayInputStream(responseXml.getBytes("UTF-8"));
+			StreamSource source = new StreamSource(stream);
+
+			// Set contents of message
+			soapPart.setContent(source);
+
+			SOAPBody soapBody = message.getSOAPBody();
+			NodeList nlist = soapBody.getElementsByTagNameNS("*","return");
+			for (int i = 0; i < nlist.getLength(); i++) {
+				try {
+					Element result = (Element) nlist.item(i);
+					String lang = result.getAttribute("language");
+					double d=Double.parseDouble(result.getAttribute("guessConfidence"));
+					
+					guesses.add(new GuessedLanguage(lang, d));
+				} catch (Exception e) {
+					e.printStackTrace();
+				}
+
+			}
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+
+		return guesses;
+	}
+}

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java?rev=1338425&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java Mon May 14 21:07:19 2012
@@ -0,0 +1,259 @@
+package org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CREATOR;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Vector;
+import java.util.Map.Entry;
+
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Component(immediate = true, metatype = true)
+@Service
+@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "celiLemmatizer") })
+public class CeliLemmatizerEnhancementEngine extends AbstractEnhancementEngine<IOException, RuntimeException> implements EnhancementEngine, ServiceProperties {
+	
+	/**
+	 * This ensures that no connections to external services are made if Stanbol is started in offline mode 
+	 * as the OnlineMode service will only be available if OfflineMode is deactivated. 
+	 */
+	@Reference
+    private OnlineMode onlineMode; 
+	
+	public static final UriRef hasLemmaForm = new UriRef("http://fise.iks-project.eu/ontology/hasLemmaForm");
+	public static final UriRef hasMorphoFeature = new UriRef("http://fise.iks-project.eu/ontology/hasMorphologicalFeature");
+
+	private static List<String> supportedLangs = new Vector<String>();
+	static {
+		supportedLangs.add("it");
+		supportedLangs.add("da");
+		supportedLangs.add("de");
+		supportedLangs.add("ru");
+		supportedLangs.add("ro");
+	}
+
+	/**
+	 * The literal representing the LangIDEngine as creator.
+	 */
+	public static final Literal LANG_ID_ENGINE_NAME = LiteralFactory.getInstance().createTypedLiteral("org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine");
+
+	/**
+	 * The default value for the Execution of this Engine. Currently set to
+	 * {@link ServiceProperties#ORDERING_CONTENT_EXTRACTION}
+	 */
+	public static final Integer defaultOrder = ServiceProperties.ORDERING_CONTENT_EXTRACTION;
+
+	private Logger log = LoggerFactory.getLogger(getClass());
+
+	private String language = null;
+
+	/**
+	 * This contains the only MIME type directly supported by this enhancement
+	 * engine.
+	 */
+	private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
+
+	/**
+	 * Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE}
+	 */
+	private static final Set<String> SUPPORTED_MIMTYPES = Collections.singleton(TEXT_PLAIN_MIMETYPE);
+
+	@Property
+	public static final String LICENSE_KEY = "org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.license";
+
+	@Property(value = "http://linguagrid.org/LSGrid/ws/morpho-analyser")
+	public static final String SERVICE_URL = "org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.url";
+
+	@Property(boolValue = false)
+	public static final String MORPHOLOGICAL_ANALYSIS = "org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.morphoAnalysis";
+
+	private String licenseKey;
+	private URL serviceURL;
+	private boolean completeMorphoAnalysis;
+
+	private LemmatizerClientHTTP client;
+
+	@Override
+	@Activate
+	protected void activate(ComponentContext ctx) throws IOException, ConfigurationException {
+		super.activate(ctx);
+		Dictionary<String, Object> properties = ctx.getProperties();
+		this.licenseKey = (String) properties.get(LICENSE_KEY);
+		if (licenseKey == null || licenseKey.isEmpty()) {
+			log.warn("no CELI license key configured for this Engine, a guest account will be used (max 100 requests per day). Go on http://linguagrid.org for getting a proper license key.");
+		}
+		String url = (String) properties.get(SERVICE_URL);
+		if (url == null || url.isEmpty()) {
+			throw new ConfigurationException(SERVICE_URL, String.format("%s : please configure the URL of the CELI Web Service (e.g. by" + "using the 'Configuration' tab of the Apache Felix Web Console).", getClass().getSimpleName()));
+		}
+		this.serviceURL = new URL(url);
+
+		try {
+			this.completeMorphoAnalysis = (Boolean) properties.get(MORPHOLOGICAL_ANALYSIS);
+		} catch (Exception e) {
+			this.completeMorphoAnalysis = false;
+		}
+		this.client = new LemmatizerClientHTTP(this.serviceURL, this.licenseKey);
+	}
+
+	@Override
+	@Deactivate
+	protected void deactivate(ComponentContext ce) {
+		super.deactivate(ce);
+	}
+
+	@Override
+	public int canEnhance(ContentItem ci) throws EngineException {
+		this.language = extractLanguage(ci);
+		if (language == null) {
+			throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
+		}
+
+		if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null && this.isLangSupported(language))
+			return ENHANCE_ASYNC;
+		else
+			return CANNOT_ENHANCE;
+	}
+
+	/**
+	 * Extracts the language of the parsed ContentItem from the metadata
+	 * 
+	 * @param ci
+	 *            the content item
+	 * @return the language
+	 */
+	private String extractLanguage(ContentItem ci) {
+		MGraph metadata = ci.getMetadata();
+		Iterator<Triple> langaugeEnhancementCreatorTriples = metadata.filter(null, DC_CREATOR, LANG_ID_ENGINE_NAME);
+		if (langaugeEnhancementCreatorTriples.hasNext()) {
+			String lang = EnhancementEngineHelper.getString(metadata, langaugeEnhancementCreatorTriples.next().getSubject(), DC_LANGUAGE);
+			if (lang != null) {
+				return lang;
+			} else {
+				log.info("Unable to extract language for ContentItem " + ci.getUri().getUnicodeString() + "! The Enhancement of the " + LANG_ID_ENGINE_NAME.getLexicalForm() + " is missing the " + DC_LANGUAGE + " property ... return '{}' as default");
+
+				return null;
+			}
+		} else {
+
+			Iterator<Triple> it = metadata.filter(null, DC_LANGUAGE, null);
+			if (it.hasNext()) {
+				Resource res = it.next().getObject();
+				if (res instanceof Literal) {
+					return ((Literal) res).getLexicalForm();
+				} else {
+					return res.toString();
+				}
+			}
+
+			log.warn("Unable to extract language for ContentItem " + ci.getUri().getUnicodeString() + "! Is the " + LANG_ID_ENGINE_NAME.getLexicalForm() + " active?  ... return '{}' as default");
+			return null;
+		}
+	}
+
+	@Override
+	public void computeEnhancements(ContentItem ci) throws EngineException {
+		if (this.language == null)
+			this.language = extractLanguage(ci);
+
+		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
+		if (contentPart == null) {
+			throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This "
+					+ "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
+		}
+		String text = "";
+		try {
+			text = ContentItemHelper.getText(contentPart.getValue());
+		} catch (IOException e) {
+			throw new InvalidContentException(this, ci, e);
+		}
+		if (text.trim().length() == 0) {
+			log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
+			return;
+		}
+
+		try {
+			
+			MGraph g = ci.getMetadata();
+			LiteralFactory literalFactory = LiteralFactory.getInstance();
+
+			if (this.completeMorphoAnalysis) {
+				List<LexicalEntry> terms = this.client.performMorfologicalAnalysis(text, language);
+				for (LexicalEntry le : terms) {
+					UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
+					g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, literalFactory.createTypedLiteral(le.getWordForm())));
+					if (le.from > 0 && le.to > 0) {
+						g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(Integer.toString(le.from))));
+						g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(Integer.toString(le.to))));
+					}
+					for (Reading r : le.termReadings) {
+						g.add(new TripleImpl(textAnnotation, hasLemmaForm, literalFactory.createTypedLiteral(r.getLemma())));
+						for (String name : r.lexicalFeatures.keySet()) {
+							String value = r.lexicalFeatures.get(name);
+							g.add(new TripleImpl(textAnnotation, hasMorphoFeature, literalFactory.createTypedLiteral(name + "=" + value)));
+						}
+					}
+				}
+			} else {
+				String lemmatizedContents = this.client.lemmatizeContents(text, language);
+
+				UriRef textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
+				g.add(new TripleImpl(textEnhancement, hasLemmaForm, literalFactory.createTypedLiteral(lemmatizedContents)));
+			}
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+
+	}
+
+	private boolean isLangSupported(String language) {
+		if (supportedLangs.contains(language))
+			return true;
+		else
+			return false;
+	}
+
+	@Override
+	public Map<String, Object> getServiceProperties() {
+		return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
+	}
+
+}

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java?rev=1338425&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java Mon May 14 21:07:19 2012
@@ -0,0 +1,187 @@
+package org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Vector;
+
+import javax.xml.soap.MessageFactory;
+import javax.xml.soap.SOAPBody;
+import javax.xml.soap.SOAPMessage;
+import javax.xml.soap.SOAPPart;
+import javax.xml.transform.stream.StreamSource;
+
+import org.apache.clerezza.rdf.core.impl.util.Base64;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Element;
+import org.w3c.dom.NodeList;
+
+public class LemmatizerClientHTTP {
+	
+	private URL serviceEP;
+	private String licenseKey;
+	private final Logger log = LoggerFactory.getLogger(getClass());
+
+	public LemmatizerClientHTTP(URL serviceUrl, String licenseKey){
+		this.serviceEP=serviceUrl;
+		this.licenseKey=licenseKey;
+	}
+	
+	public String doPostRequest(URL url, String body) throws IOException {
+		
+		HttpURLConnection urlConn = (HttpURLConnection) url.openConnection();
+		urlConn.setRequestMethod("POST");
+		urlConn.setDoInput(true);
+		if (null != body) {
+			urlConn.setDoOutput(true);
+		} else {
+			urlConn.setDoOutput(false);
+		}
+		urlConn.setUseCaches(false);
+		String	contentType = "text/xml; charset=utf-8";
+		urlConn.setRequestProperty("Content-Type", contentType);
+		if(this.licenseKey!=null){
+			String encoded = Base64.encode(this.licenseKey.getBytes("UTF-8"));
+			urlConn.setRequestProperty("Authorization", "Basic "+encoded);
+		}
+		// send POST output
+		if (null != body) {
+			OutputStreamWriter printout = new OutputStreamWriter(urlConn.getOutputStream(), "UTF-8");
+			printout.write(body);
+			printout.flush();
+			printout.close();
+		}
+		
+		//close connection
+		urlConn.disconnect();
+		
+		// get response data
+		return IOUtils.toString(urlConn.getInputStream(), "UTF-8");
+	}
+	
+	public List<LexicalEntry> performMorfologicalAnalysis(String text,String lang) {
+		List<LexicalEntry> lista=new Vector<LexicalEntry>();
+		try {
+			String txt = StringEscapeUtils.escapeXml(text);
+			String xmldata = "<soapenv:Envelope xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\" xmlns:mor=\"http://research.celi.it/MorphologicalAnalyzer\"><soapenv:Header/><soapenv:Body>"+
+							"<mor:inputText lang=\""+lang+"\" text=\""+txt+"\"/></soapenv:Body></soapenv:Envelope>";
+
+			String responseXml = doPostRequest(this.serviceEP, xmldata);
+			log.debug(responseXml);
+
+			// Create SoapMessage
+			MessageFactory msgFactory = MessageFactory.newInstance();
+			SOAPMessage message = msgFactory.createMessage();
+			SOAPPart soapPart = message.getSOAPPart();
+
+			// Load the SOAP text into a stream source
+			ByteArrayInputStream stream = new ByteArrayInputStream(responseXml.getBytes("UTF-8"));
+			StreamSource source = new StreamSource(stream);
+
+			// Set contents of message
+			soapPart.setContent(source);
+			
+			SOAPBody soapBody = message.getSOAPBody();
+			NodeList nlist = soapBody.getElementsByTagNameNS("*","LexicalEntry");
+			for (int i = 0; i < nlist.getLength() ; i++) {
+				try {
+					Element result = (Element) nlist.item(i);
+					String wordForm = result.getAttribute("WordForm");
+					int from = Integer.parseInt(result.getAttribute("OffsetFrom"));
+					int to = Integer.parseInt(result.getAttribute("OffsetTo"));
+					LexicalEntry le=new LexicalEntry(wordForm, from, to);
+					
+					List<Reading> readings = new Vector<Reading>();
+					NodeList lemmasList = result.getElementsByTagNameNS("*","Lemma");
+					if(lemmasList!=null && lemmasList.getLength()>0){
+						for(int j=0;j<lemmasList.getLength();j++){
+							Element lemmaElm = (Element) lemmasList.item(j);
+							String lemma = lemmaElm.getTextContent();
+							NodeList features = ((Element)lemmaElm.getParentNode()).getElementsByTagNameNS("*","LexicalFeature");
+							Hashtable<String,String> featuresMap=new Hashtable<String,String>();
+							for(int k=0;features!=null && k<features.getLength();k++){
+								Element feat = (Element) features.item(k);
+								String name = feat.getAttribute("name");
+								String value = feat.getTextContent();
+								featuresMap.put(name, value);
+							}
+							Reading r=new Reading(lemma, featuresMap);
+							readings.add(r);
+						}
+					}
+					
+					le.setTermReadings(readings);
+					lista.add(le);
+				} catch (Exception e) {
+					e.printStackTrace();
+				}
+
+			}
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+		
+		return lista;
+	}
+
+
+	public String lemmatizeContents(String text,String lang) {
+		String buff="";
+		try {
+			String txt = StringEscapeUtils.escapeXml(text);
+			String xmldata = "<soapenv:Envelope xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\" xmlns:mor=\"http://research.celi.it/MorphologicalAnalyzer\"><soapenv:Header/><soapenv:Body>"+
+							"<mor:inputText lang=\""+lang+"\" text=\""+txt+"\"/></soapenv:Body></soapenv:Envelope>";
+
+			String responseXml = doPostRequest(this.serviceEP, xmldata);
+			log.debug(responseXml);
+
+			// Create SoapMessage
+			MessageFactory msgFactory = MessageFactory.newInstance();
+			SOAPMessage message = msgFactory.createMessage();
+			SOAPPart soapPart = message.getSOAPPart();
+
+			// Load the SOAP text into a stream source
+			ByteArrayInputStream stream = new ByteArrayInputStream(responseXml.getBytes("UTF-8"));
+			StreamSource source = new StreamSource(stream);
+
+			// Set contents of message
+			soapPart.setContent(source);
+			
+			SOAPBody soapBody = message.getSOAPBody();
+			NodeList nlist = soapBody.getElementsByTagNameNS("*","LexicalEntry");
+			for (int i = 0; i < nlist.getLength() ; i++) {
+				try {
+					Element result = (Element) nlist.item(i);
+					NodeList lemmasList = result.getElementsByTagNameNS("*","Lemma");
+					if(lemmasList!=null && lemmasList.getLength()>0){
+						HashSet<String> lemmas=new HashSet<String>();
+						for(int j=0;j<lemmasList.getLength();j++){
+							lemmas.add(lemmasList.item(j).getTextContent());
+						}
+						for(String lemma: lemmas){
+							buff=buff+lemma+" ";
+						}
+					}
+					else
+						buff=buff+result.getAttributeNS("*","WordForm")+" ";
+				} catch (Exception e) {
+					e.printStackTrace();
+				}
+
+			}
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+		
+		return buff.trim();
+	}
+
+}

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LexicalEntry.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LexicalEntry.java?rev=1338425&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LexicalEntry.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LexicalEntry.java Mon May 14 21:07:19 2012
@@ -0,0 +1,53 @@
+package org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl;
+
+import java.util.List;
+
+public class LexicalEntry {
+	
+	String wordForm;
+	int from, to;
+	
+	List<Reading> termReadings=null;
+
+	public LexicalEntry(String wordForm, int from, int to) {
+		super();
+		this.wordForm = wordForm;
+		this.from = from;
+		this.to = to;
+	}
+
+	public String getWordForm() {
+		return wordForm;
+	}
+
+	public void setWordForm(String wordForm) {
+		this.wordForm = wordForm;
+	}
+
+	public int getFrom() {
+		return from;
+	}
+
+	public void setFrom(int from) {
+		this.from = from;
+	}
+
+	public int getTo() {
+		return to;
+	}
+
+	public void setTo(int to) {
+		this.to = to;
+	}
+
+	public List<Reading> getTermReadings() {
+		return termReadings;
+	}
+
+	public void setTermReadings(List<Reading> termReadings) {
+		this.termReadings = termReadings;
+	}
+	
+	
+}
+

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java?rev=1338425&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java Mon May 14 21:07:19 2012
@@ -0,0 +1,33 @@
+package org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl;
+
+import java.util.Hashtable;
+
+public class Reading {
+	
+	String lemma;
+	Hashtable<String,String> lexicalFeatures;
+	
+	public Reading(String lemma, Hashtable<String, String> lexicalFeatures) {
+		super();
+		this.lemma = lemma;
+		this.lexicalFeatures = lexicalFeatures;
+	}
+
+	public String getLemma() {
+		return lemma;
+	}
+
+	public void setLemma(String lemma) {
+		this.lemma = lemma;
+	}
+
+	public Hashtable<String, String> getLexicalFeatures() {
+		return lexicalFeatures;
+	}
+
+	public void setLexicalFeatures(Hashtable<String, String> lexicalFeatures) {
+		this.lexicalFeatures = lexicalFeatures;
+	}
+	
+	
+}

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java?rev=1338425&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java Mon May 14 21:07:19 2012
@@ -0,0 +1,345 @@
+package org.apache.stanbol.enhancer.engines.celi.ner.impl;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.*;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Map.Entry;
+import java.util.Vector;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NoConvertorException;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Reference;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Component(immediate = true, metatype = true)
+@Service
+@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "celiNer") })
+public class CeliNamedEntityExtractionEnhancementEngine extends AbstractEnhancementEngine<IOException, RuntimeException> implements EnhancementEngine, ServiceProperties {
+	
+	/**
+	 * This ensures that no connections to external services are made if Stanbol is started in offline mode 
+	 * as the OnlineMode service will only be available if OfflineMode is deactivated. 
+	 */
+	@SuppressWarnings("unused")
+    @Reference
+    private OnlineMode onlineMode; 
+	
+	/**
+	 * The literal representing the LangIDEngine as creator.
+	 */
+	public static final Literal LANG_ID_ENGINE_NAME = LiteralFactory.getInstance().createTypedLiteral("org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine");
+
+	private static Map<String, UriRef> entityTypes = new HashMap<String, UriRef>();
+	static {
+		entityTypes.put("pers", OntologicalClasses.DBPEDIA_PERSON);
+		entityTypes.put("loc", OntologicalClasses.DBPEDIA_PLACE);
+		entityTypes.put("org", OntologicalClasses.DBPEDIA_ORGANISATION);
+
+		entityTypes.put("time", OntologicalClasses.SKOS_CONCEPT);
+		entityTypes.put("prod", OntologicalClasses.SKOS_CONCEPT);
+		entityTypes.put("amount", OntologicalClasses.SKOS_CONCEPT);
+	}
+	/**
+	 * The supported languages (configured via the {@link #SUPPORTED_LANGUAGES}
+	 * configuration.
+	 */
+	private Collection<String> supportedLangs;
+
+	/**
+	 * The default value for the Execution of this Engine. Currently set to
+	 * {@link ServiceProperties#ORDERING_CONTENT_EXTRACTION}
+	 */
+	public static final Integer defaultOrder = ORDERING_CONTENT_EXTRACTION;
+
+	private Logger log = LoggerFactory.getLogger(getClass());
+
+	/**
+	 * This contains the only MIME type directly supported by this enhancement
+	 * engine.
+	 */
+	private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
+
+	/**
+	 * Set containing the only supported mime type {@link #TEXT_PLAIN_MIMETYPE}
+	 */
+	private static final Set<String> SUPPORTED_MIMTYPES = Collections.singleton(TEXT_PLAIN_MIMETYPE);
+
+	@Property
+	public static final String LICENSE_KEY = "org.apache.stanbol.enhancer.engines.celi.ner.license";
+
+	
+	@Property(value = "http://linguagrid.org/LSGrid/ws/com.celi-france.linguagrid.namedentityrecognition.v0u0.demo")
+	public static final String SERVICE_URL = "org.apache.stanbol.enhancer.engines.celi.ner.url";
+
+    @Property(value = "fr",cardinality=1000)
+    public static final String SUPPORTED_LANGUAGES = "org.apache.stanbol.enhancer.engines.celi.ner.languages";
+		
+	private String licenseKey;
+	private URL serviceURL;
+
+	private NERserviceClientHTTP client;
+
+	@Override
+	@Activate
+	protected void activate(ComponentContext ctx) throws IOException, ConfigurationException {
+		super.activate(ctx);
+		@SuppressWarnings("unchecked")
+        Dictionary<String, Object> properties = ctx.getProperties();
+
+		this.licenseKey = (String) properties.get(LICENSE_KEY);
+		if (licenseKey == null || licenseKey.isEmpty()) {
+			log.warn("no CELI license key configured for this Engine, a guest account will be used (max 100 requests per day). Go on http://linguagrid.org for getting a proper license key.");
+		}
+		String url = (String) properties.get(SERVICE_URL);
+		if (url == null || url.isEmpty()) {
+			throw new ConfigurationException(SERVICE_URL, String.format("%s : please configure the URL of the CELI Web Service (e.g. by" + "using the 'Configuration' tab of the Apache Felix Web Console).", getClass().getSimpleName()));
+		}
+		this.serviceURL = new URL(url);
+		
+		this.client = new NERserviceClientHTTP(this.serviceURL, this.licenseKey);
+		
+		//init the supported languages (now configurable)
+		Object languagObject = properties.get(SUPPORTED_LANGUAGES);
+		HashSet<String> languages;
+		if(languagObject instanceof String){
+		    //support splitting multiple languages with ';'
+		    languages = new HashSet<String>(Arrays.asList(languagObject.toString().split(";")));
+		    if(languages.remove("")){
+		        log.warn("Languages configuration '{}' contained empty language -> removed",languagObject);
+		    }//empty not allowed
+		} else if(languagObject instanceof Iterable<?>){
+		    languages = new HashSet<String>();
+		    for(Object o : (Iterable<Object>)languagObject){
+		        if(o != null && !o.toString().isEmpty()){
+		            languages.add(o.toString());
+		        } else {
+		            log.warn("Language configuration '{}' contained illegal value '{}' -> removed",
+		                languagObject.getClass().isArray()?
+		                        Arrays.toString((Object[])languagObject): //nicer logging for arrays
+		                            languagObject,o);
+		        }
+		    }
+		} else {
+		    languages = null;
+		}
+		if(languages == null || languages.isEmpty()){
+		    throw new ConfigurationException(SUPPORTED_LANGUAGES, String.format(
+		        "Missing or invalid configuration of the supported languages (config :'%s'",
+		        languagObject != null && languagObject.getClass().isArray() ?
+		                Arrays.toString((Object[])languagObject): //nicer logging for arrays
+		                    languagObject));
+		}
+		this.supportedLangs = Collections.unmodifiableSet(languages);
+	}
+
+	@Override
+	@Deactivate
+	protected void deactivate(ComponentContext ce) {
+		super.deactivate(ce);
+        this.supportedLangs = null;
+        this.client = null;
+        this.serviceURL = null;
+	}
+
+	@Override
+	public int canEnhance(ContentItem ci) throws EngineException {
+		String language = extractLanguage(ci);
+		if (language == null) {
+		    log.info("Unable to extract language annotation for ContentItem  -> will not enhance",
+		        ci.getUri());
+		    return CANNOT_ENHANCE;
+		} else if(isLangSupported(language)){
+		    log.debug("Language '{}' of contentItem {} is not supported (supported: {}) -> will not enhance",
+		        new Object[]{language,ci.getUri(),supportedLangs});
+		    return CANNOT_ENHANCE;
+		}
+		
+		if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null)
+			return ENHANCE_ASYNC;
+		else
+		    log.debug("No Content of type {} found in ConentItem {} -> will not enhance",
+		        SUPPORTED_MIMTYPES,ci.getUri());
+			return CANNOT_ENHANCE;
+	}
+
+	/**
+	 * Extracts the language of the parsed ContentItem from the metadata
+	 * 
+	 * @param ci
+	 *            the content item
+	 * @return the language
+	 */
+	private String extractLanguage(ContentItem ci) {
+		MGraph metadata = ci.getMetadata();
+		Iterator<Triple> langaugeEnhancementCreatorTriples = metadata.filter(null, DC_CREATOR, LANG_ID_ENGINE_NAME);
+		if (langaugeEnhancementCreatorTriples.hasNext()) {
+			String lang = EnhancementEngineHelper.getString(metadata, langaugeEnhancementCreatorTriples.next().getSubject(), DC_LANGUAGE);
+			if (lang != null) {
+				return lang;
+			} else {
+				log.info("Unable to extract language for ContentItem " + ci.getUri().getUnicodeString() + "! The Enhancement of the " + LANG_ID_ENGINE_NAME.getLexicalForm() + " is missing the " + DC_LANGUAGE + " property ... return '{}' as default");
+				return null;
+			}
+		} else {
+
+			Iterator<Triple> it = metadata.filter(null, DC_LANGUAGE, null);
+			if (it.hasNext()) {
+				Resource res = it.next().getObject();
+				if (res instanceof Literal) {
+					return ((Literal) res).getLexicalForm();
+				} else {
+					return res.toString();
+				}
+			}
+
+			log.warn("Unable to extract language for ContentItem " + ci.getUri().getUnicodeString() + "! Is the " + LANG_ID_ENGINE_NAME.getLexicalForm() + " active?  ... return '{}' as default");
+			return null;
+		}
+	}
+
+	@Override
+	public void computeEnhancements(ContentItem ci) throws EngineException {
+		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
+		if (contentPart == null) {
+			throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This "
+					+ "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
+		}
+		String text = "";
+		try {
+			text = ContentItemHelper.getText(contentPart.getValue());
+		} catch (IOException e) {
+			throw new InvalidContentException(this, ci, e);
+		}
+		if (text.trim().length() == 0) {
+			log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
+			return;
+		}
+        String language = extractLanguage(ci);
+        if (language == null) {
+            throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
+        }
+        Language lang = new Language(language); //used for the palin literals in TextAnnotations
+		try {
+			List<NamedEntity> lista = this.client.extractEntities(text);
+			LiteralFactory literalFactory = LiteralFactory.getInstance();
+
+			MGraph g = ci.getMetadata();
+
+			for (NamedEntity ne : lista) {
+				try {
+					UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
+					//add selected text as PlainLiteral in the language extracted from the text
+					g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, 
+					    new PlainLiteralImpl(ne.getFormKind(),lang)));
+					g.add(new TripleImpl(textAnnotation, DC_TYPE, getEntityRefForType(ne.type)));
+					if (ne.getFrom() != null && ne.getTo() != null) {
+						g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(
+						    ne.getFrom().intValue())));
+						g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(
+						    ne.getTo().intValue())));
+						g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, 
+						    new PlainLiteralImpl(getSelectionContext(text, ne.getFormKind(), ne.getFrom().intValue()), lang)));
+					}
+				} catch (NoConvertorException e) {
+					log.error(e.getMessage(), e);
+				}
+			}
+		} catch (Exception e) {
+			log.error(e.getMessage(), e);
+		}
+
+	}
+	/**
+	 * The maximum size of the preix/suffix for the selection context
+	 */
+	private static final int SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE = 50;
+	/**
+	 * Extracts the selection context based on the content, selection and
+	 * the start char offset of the selection
+	 * @param content
+	 * @param selection
+	 * @param current
+	 * @return
+	 */
+	private String getSelectionContext(String content, String selection,int current){
+        //extract the selection context
+        int beginPos;
+        if(current <= SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE){
+            beginPos = 0;
+        } else {
+            int start = current-SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
+            beginPos = content.indexOf(' ',start);
+            if(beginPos < 0 || beginPos >= current){ //no words
+                beginPos = start; //begin within a word
+            }
+        }
+        int endPos;
+        if(current+selection.length()+SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= content.length()){
+            endPos = content.length();
+        } else {
+            int start = current+selection.length()+SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
+            endPos = content.lastIndexOf(' ', start);
+            if(endPos <= current+selection.length()){
+                endPos = start; //end within a word;
+            }
+        }
+        return content.substring(beginPos, endPos);
+	}
+	
+
+	private boolean isLangSupported(String language) {
+		return supportedLangs.contains(language);
+	}
+
+	private Resource getEntityRefForType(String type) {
+		if (!entityTypes.containsKey(type))
+			return null;
+		else
+			return entityTypes.get(type);
+	}
+
+	@Override
+	public Map<String, Object> getServiceProperties() {
+		return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING, (Object) defaultOrder));
+	}
+
+}