You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/05/15 14:33:06 UTC

svn commit: r1338669 - in /incubator/stanbol/branches/celi-enhancement-engines: engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/ engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl...

Author: rwesten
Date: Tue May 15 12:33:05 2012
New Revision: 1338669

URL: http://svn.apache.org/viewvc?rev=1338669&view=rev
Log:
Implementation of STANBOL-613 within the CELI enhancement engine branch

* add getLanguageAnnotations nad getLanguage utility methods to the EnhancementEngineHelper
* updates Engines to use this Utility
* Adds UnitTests to the LanguageId and CELI Language Identification Engines to test that Enhancements created by those engines are correctly processed by the new utility methods

Other changes:

* STANBOL-612: moved validation method for LanguageAnnotations from the CELI Lanugage Identification Engine to the EnhancementStructureHelper
* Use this method for validating enhancements created by the LangId Engine (the one based on Apache Tika)

Added:
    incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java   (with props)
    incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/MockComponentContext.java   (with props)
Modified:
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/langid/pom.xml
    incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
    incubator/stanbol/branches/celi-enhancement-engines/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
    incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java
    incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java Tue May 15 12:33:05 2012
@@ -133,7 +133,7 @@ public class CeliClassificationEnhanceme
 
 	@Override
 	public int canEnhance(ContentItem ci) throws EngineException {
-		this.language = extractLanguage(ci);
+		this.language = EnhancementEngineHelper.getLanguage(ci);
 		if (language == null) {
 			throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
 		}
@@ -145,46 +145,10 @@ public class CeliClassificationEnhanceme
 	}
 
 
-	/**
-     * Extracts the language of the parsed ContentItem from the metadata
-     * @param ci the content item
-     * @return the language
-     */
-    private String extractLanguage(ContentItem ci) {
-        MGraph metadata = ci.getMetadata();
-        Iterator<Triple> langaugeEnhancementCreatorTriples = 
-            metadata.filter(null, DC_CREATOR, LANG_ID_ENGINE_NAME);
-        if(langaugeEnhancementCreatorTriples.hasNext()){
-            String lang = EnhancementEngineHelper.getString(metadata, 
-                langaugeEnhancementCreatorTriples.next().getSubject(), DC_LANGUAGE);
-            if(lang != null){
-                return lang;
-            } else {
-                log.info("Unable to extract language for ContentItem "+ci.getUri().getUnicodeString()+"! The Enhancement of the "+LANG_ID_ENGINE_NAME.getLexicalForm()+
-                		" is missing the "+DC_LANGUAGE+" property ... return '{}' as default");
-                return null;
-            }
-        } else {
-        	
-        	Iterator<Triple> it = metadata.filter(null, DC_LANGUAGE, null);
-            if (it.hasNext()) {
-                Resource res = it.next().getObject();
-                if (res instanceof Literal) {
-                    return ((Literal) res).getLexicalForm();
-                } else {
-                    return res.toString();
-                }
-            }
-        	
-            log.warn("Unable to extract language for ContentItem "+ci.getUri().getUnicodeString()+"! Is the "+LANG_ID_ENGINE_NAME.getLexicalForm()+" active?  ... return '{}' as default");
-            return null;
-        }
-    }
-
 	@Override
 	public void computeEnhancements(ContentItem ci) throws EngineException {
 		if (this.language == null)
-			this.language = extractLanguage(ci);
+			this.language = EnhancementEngineHelper.getLanguage(ci);
 
 		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
 		if (contentPart == null) {

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java Tue May 15 12:33:05 2012
@@ -142,7 +142,7 @@ public class CeliLemmatizerEnhancementEn
 
 	@Override
 	public int canEnhance(ContentItem ci) throws EngineException {
-		this.language = extractLanguage(ci);
+		this.language = EnhancementEngineHelper.getLanguage(ci);
 		if (language == null) {
 			throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
 		}
@@ -153,46 +153,10 @@ public class CeliLemmatizerEnhancementEn
 			return CANNOT_ENHANCE;
 	}
 
-	/**
-	 * Extracts the language of the parsed ContentItem from the metadata
-	 * 
-	 * @param ci
-	 *            the content item
-	 * @return the language
-	 */
-	private String extractLanguage(ContentItem ci) {
-		MGraph metadata = ci.getMetadata();
-		Iterator<Triple> langaugeEnhancementCreatorTriples = metadata.filter(null, DC_CREATOR, LANG_ID_ENGINE_NAME);
-		if (langaugeEnhancementCreatorTriples.hasNext()) {
-			String lang = EnhancementEngineHelper.getString(metadata, langaugeEnhancementCreatorTriples.next().getSubject(), DC_LANGUAGE);
-			if (lang != null) {
-				return lang;
-			} else {
-				log.info("Unable to extract language for ContentItem " + ci.getUri().getUnicodeString() + "! The Enhancement of the " + LANG_ID_ENGINE_NAME.getLexicalForm() + " is missing the " + DC_LANGUAGE + " property ... return '{}' as default");
-
-				return null;
-			}
-		} else {
-
-			Iterator<Triple> it = metadata.filter(null, DC_LANGUAGE, null);
-			if (it.hasNext()) {
-				Resource res = it.next().getObject();
-				if (res instanceof Literal) {
-					return ((Literal) res).getLexicalForm();
-				} else {
-					return res.toString();
-				}
-			}
-
-			log.warn("Unable to extract language for ContentItem " + ci.getUri().getUnicodeString() + "! Is the " + LANG_ID_ENGINE_NAME.getLexicalForm() + " active?  ... return '{}' as default");
-			return null;
-		}
-	}
-
 	@Override
 	public void computeEnhancements(ContentItem ci) throws EngineException {
 		if (this.language == null)
-			this.language = extractLanguage(ci);
+			this.language = EnhancementEngineHelper.getLanguage(ci);
 
 		Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
 		if (contentPart == null) {

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java Tue May 15 12:33:05 2012
@@ -182,7 +182,7 @@ public class CeliNamedEntityExtractionEn
 
 	@Override
 	public int canEnhance(ContentItem ci) throws EngineException {
-		String language = extractLanguage(ci);
+		String language = EnhancementEngineHelper.getLanguage(ci);
 		if (language == null) {
 		    log.info("Unable to extract language annotation for ContentItem  -> will not enhance",
 		        ci.getUri());
@@ -201,40 +201,6 @@ public class CeliNamedEntityExtractionEn
 			return CANNOT_ENHANCE;
 	}
 
-	/**
-	 * Extracts the language of the parsed ContentItem from the metadata
-	 * 
-	 * @param ci
-	 *            the content item
-	 * @return the language
-	 */
-	private String extractLanguage(ContentItem ci) {
-		MGraph metadata = ci.getMetadata();
-		Iterator<Triple> langaugeEnhancementCreatorTriples = metadata.filter(null, DC_CREATOR, LANG_ID_ENGINE_NAME);
-		if (langaugeEnhancementCreatorTriples.hasNext()) {
-			String lang = EnhancementEngineHelper.getString(metadata, langaugeEnhancementCreatorTriples.next().getSubject(), DC_LANGUAGE);
-			if (lang != null) {
-				return lang;
-			} else {
-				log.info("Unable to extract language for ContentItem " + ci.getUri().getUnicodeString() + "! The Enhancement of the " + LANG_ID_ENGINE_NAME.getLexicalForm() + " is missing the " + DC_LANGUAGE + " property ... return '{}' as default");
-				return null;
-			}
-		} else {
-
-			Iterator<Triple> it = metadata.filter(null, DC_LANGUAGE, null);
-			if (it.hasNext()) {
-				Resource res = it.next().getObject();
-				if (res instanceof Literal) {
-					return ((Literal) res).getLexicalForm();
-				} else {
-					return res.toString();
-				}
-			}
-
-			log.warn("Unable to extract language for ContentItem " + ci.getUri().getUnicodeString() + "! Is the " + LANG_ID_ENGINE_NAME.getLexicalForm() + " active?  ... return '{}' as default");
-			return null;
-		}
-	}
 
 	@Override
 	public void computeEnhancements(ContentItem ci) throws EngineException {
@@ -253,7 +219,7 @@ public class CeliNamedEntityExtractionEn
 			log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
 			return;
 		}
-        String language = extractLanguage(ci);
+        String language = EnhancementEngineHelper.getLanguage(ci);
         if (language == null) {
             throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
         }

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties Tue May 15 12:33:05 2012
@@ -52,11 +52,11 @@ org.apache.stanbol.enhancer.engines.celi
 org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.name=Apache Stanbol Enhancer Engine: CELI Language Identifier
 org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.description=An Enhancement Engine that sends ContentItems to Language Identifier Web Service and converts the results to the Stanbol Enhancement Structure
 
-org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.license.name=License Key
-org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.license.description=The key needed to access the CELI Language Identifier Web Service
+org.apache.stanbol.enhancer.engines.celi.langid.license.name=License Key
+org.apache.stanbol.enhancer.engines.celi.langid.license.description=The key needed to access the CELI Language Identifier Web Service
 
-org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.url.name=Service URL
-org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.url.description=The URL of the CELI Language Identifier Web Service
+org.apache.stanbol.enhancer.engines.celi.langid.url.name=Service URL
+org.apache.stanbol.enhancer.engines.celi.langid.url.description=The URL of the CELI Language Identifier Web Service
 
 
 #LEMM

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java Tue May 15 12:33:05 2012
@@ -1,5 +1,6 @@
 package org.apache.stanbol.enhancer.engines.celi.classification.impl;
 
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
@@ -14,9 +15,11 @@ import java.util.Iterator;
 import org.apache.clerezza.rdf.core.MGraph;
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
 import org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngineTest;
-import org.apache.stanbol.enhancer.engines.celi.test_utils.MockComponentContext;
+import org.apache.stanbol.enhancer.engines.celi.testutils.MockComponentContext;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -65,8 +68,12 @@ public class CeliClassificationEnhanceme
 	public void tesetEngine() throws Exception {
 		ContentItem ci = wrapAsContentItem(TEXT);
 		try {
-			CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
-			
+	        //add a simple triple to statically define the language of the test
+            //content
+            ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("fr")));
+            //unit test should not depend on each other (if possible)
+            //CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
+    			
 			classificationEngine.computeEnhancements(ci);
 			int textAnnoNum = checkAllTextAnnotations(ci.getMetadata(), TEXT);
 	        log.info(textAnnoNum + " TextAnnotations found ...");

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java Tue May 15 12:33:05 2012
@@ -7,6 +7,7 @@ import static org.apache.stanbol.enhance
 import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
 import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllEntityAnnotations;
 import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateEnhancement;
+import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateLanguageAnnotation;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
@@ -15,6 +16,7 @@ import java.util.Dictionary;
 import java.util.HashMap;
 import java.util.Hashtable;
 import java.util.Iterator;
+import java.util.List;
 
 import junit.framework.Assert;
 
@@ -91,10 +93,11 @@ public class CeliLanguageIdentifierEnhan
 	        //we expect the "fr" is detected for the parsed text
 	        assertEquals("The detected language for text '"+TEXT+"' MUST BE 'fr'",
 	            "fr",detectedLnaguage.getLexicalForm());
+	        assertEquals("The value oft the returned language is not the expected one", 
+	            detectedLnaguage.getLexicalForm(),EnhancementEngineHelper.getLanguage(ci));
 
 	        int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
 	        assertEquals("No EntityAnnotations are expected",0, entityAnnoNum);
-	        log.info(entityAnnoNum + " EntityAnnotations found ...");
 		} catch (EngineException e) {
 			if (e.getCause() != null && e.getCause() instanceof UnknownHostException) {
 				log.warn("Celi Service not reachable -> offline? -> deactivate test");
@@ -104,26 +107,6 @@ public class CeliLanguageIdentifierEnhan
 		}
 	}
 
-
-	private PlainLiteral validateLanguageAnnotation(MGraph g, String content,HashMap<UriRef,Resource> expectedValues) {
-		Iterator<Triple> textAnnotationIterator = g.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
-		// test if a textAnnotation is present
-		assertTrue("The Language Annotation is missing!",textAnnotationIterator.hasNext());
-		NonLiteral annotation = textAnnotationIterator.next().getSubject();
-		assertTrue("TextAnnotations MUST BE URIs", annotation instanceof UriRef);
-		assertFalse("Only a single Language Annotation is expected!", textAnnotationIterator.hasNext());
-		//validate enhancement metadata (this also checks the confidence)
-		validateEnhancement(g, (UriRef)annotation, expectedValues);
-		//validate the dc:language value
-		Iterator<Triple> languageIterator = g.filter(annotation, Properties.DC_LANGUAGE, null);
-        assertTrue("The fise:TextAnnotation for the language MUST HAVE a value for dc:language!",languageIterator.hasNext());
-        Resource languageResource = languageIterator.next().getObject();
-        assertFalse("Only a single dc:langauge value MUST BE present!", languageIterator.hasNext());
-        assertTrue("The dc:langauge value MUST BE a plain literal",languageResource instanceof PlainLiteral);
-        assertTrue("The dc:language value MIST BE at least two chars long",
-            ((PlainLiteral)languageResource).getLexicalForm().length()>=2);
-        return (PlainLiteral)languageResource;
-	}
 // removed: other tests now add a simple triple with <{ciUri},dc:langauge,{lang}>
 /**	public static void addEnanchements(ContentItem ci) throws IOException, ConfigurationException, EngineException {
 		//Add guessed language

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java Tue May 15 12:33:05 2012
@@ -1,5 +1,6 @@
 package org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl;
 
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
@@ -14,9 +15,11 @@ import java.util.Iterator;
 import org.apache.clerezza.rdf.core.MGraph;
 import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
 import org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngineTest;
-import org.apache.stanbol.enhancer.engines.celi.test_utils.MockComponentContext;
+import org.apache.stanbol.enhancer.engines.celi.testutils.MockComponentContext;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -64,8 +67,12 @@ public class CeliLemmatizerEnhancementEn
 		ContentItem ci = wrapAsContentItem(TEXT);
 		
 		try {
-			CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
-			
+	        //add a simple triple to statically define the language of the test
+            //content
+            ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("fr")));
+            //unit test should not depend on each other (if possible)
+            //CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
+    			
 			morphoAnalysisEngine.computeEnhancements(ci);
 			int textAnnoNum = checkAllTextAnnotations(ci.getMetadata(), TEXT);
 	        log.info(textAnnoNum + " TextAnnotations found ...");

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java Tue May 15 12:33:05 2012
@@ -18,7 +18,7 @@ import org.apache.stanbol.enhancer.conte
 import org.apache.stanbol.enhancer.engines.celi.classification.impl.CeliClassificationEnhancementEngine;
 import org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngineTest;
 import org.apache.stanbol.enhancer.engines.celi.ner.impl.CeliNamedEntityExtractionEnhancementEngine;
-import org.apache.stanbol.enhancer.engines.celi.test_utils.MockComponentContext;
+import org.apache.stanbol.enhancer.engines.celi.testutils.MockComponentContext;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java Tue May 15 12:33:05 2012
@@ -63,6 +63,7 @@ public class EnhancementRDFUtils {
      * @param entity
      *            the related entity
      * @param nameField the field used to extract the name
+     * @param lang the preferred language to include
      */
     public static UriRef writeEntityAnnotation(EnhancementEngine engine,
                                                LiteralFactory literalFactory,
@@ -70,7 +71,8 @@ public class EnhancementRDFUtils {
                                                UriRef contentItemId,
                                                Collection<NonLiteral> relatedEnhancements,
                                                Representation rep,
-                                               String nameField) {
+                                               String nameField, 
+                                               String lang) {
         // 1. check if the returned Entity does has a label -> if not return null
         // add labels (set only a single label. Use "en" if available!
         Text label = null;
@@ -81,7 +83,7 @@ public class EnhancementRDFUtils {
                 label = actLabel;
             } else {
                 //use startWith to match also en-GB and en-US ...
-                if (actLabel.getLanguage() != null && actLabel.getLanguage().startsWith("en")) {
+                if (actLabel.getLanguage() != null && actLabel.getLanguage().startsWith(lang)) {
                     label = actLabel;
                 }
             }

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java Tue May 15 12:33:05 2012
@@ -49,6 +49,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
@@ -61,6 +62,7 @@ import org.apache.stanbol.entityhub.serv
 import org.apache.stanbol.entityhub.servicesapi.model.Representation;
 import org.apache.stanbol.entityhub.servicesapi.model.Text;
 import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+import org.apache.stanbol.entityhub.servicesapi.query.Constraint;
 import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
 import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
 import org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint;
@@ -126,6 +128,11 @@ public class NamedEntityTaggingEngine 
 
     @Property(intValue=0)
     public static final String SERVICE_RANKING = Constants.SERVICE_RANKING;
+    /**
+     * The default language for labels included in the enhancement metadata
+     * (if not available for the parsed content).
+     */
+    private static final String DEFAULT_LANGUAGE = "en";
     
     /**
      * Service of the Entityhub that manages all the active referenced Site. This Service is used to lookup the
@@ -154,6 +161,7 @@ public class NamedEntityTaggingEngine 
     public static final Integer defaultOrder = ORDERING_EXTRACTION_ENHANCEMENT;
 
 
+
     /**
      * State if text annotations of type {@link OntologicalClasses#DBPEDIA_PERSON} are enhanced by this engine
      */
@@ -319,8 +327,11 @@ public class NamedEntityTaggingEngine 
         LiteralFactory literalFactory = LiteralFactory.getInstance();
         // Retrieve the existing text annotations (requires read lock)
         Map<NamedEntity,List<UriRef>> textAnnotations = new HashMap<NamedEntity,List<UriRef>>();
+        //the language extracted for the parsed content or NULL if not available
+        String contentLangauge;
         ci.getLock().readLock().lock();
         try {
+            contentLangauge = EnhancementEngineHelper.getLanguage(ci);
             for (Iterator<Triple> it = graph.filter(null, RDF_TYPE, TechnicalClasses.ENHANCER_TEXTANNOTATION); it
                     .hasNext();) {
                 UriRef uri = (UriRef) it.next().getSubject();
@@ -346,7 +357,7 @@ public class NamedEntityTaggingEngine 
         for (Entry<NamedEntity,List<UriRef>> entry : textAnnotations.entrySet()) {
             try {
                 List<Entity> entitySuggestions = computeEntityRecommentations(
-                    site, entry.getKey(),entry.getValue());
+                    site, entry.getKey(),entry.getValue(),contentLangauge);
                 if(entitySuggestions != null && !entitySuggestions.isEmpty()){
                     suggestions.put(entry.getKey(), entitySuggestions);
                 }
@@ -366,7 +377,10 @@ public class NamedEntityTaggingEngine 
                 for(Entity suggestion : entitySuggestions.getValue()){
                     log.debug("Add Suggestion {} for {}", suggestion.getId(), entitySuggestions.getKey());
                     EnhancementRDFUtils.writeEntityAnnotation(this, literalFactory, graph, ci.getUri(),
-                        annotationsToRelate, suggestion.getRepresentation(), nameField);
+                        annotationsToRelate, suggestion.getRepresentation(), nameField,
+                        //TODO: maybe we want labels in a different language than the
+                        //      language of the content (e.g. Accept-Language header)?!
+                        contentLangauge == null ? DEFAULT_LANGUAGE : contentLangauge);
                     if (dereferenceEntities) {
                         entityData.put(suggestion.getId(), suggestion.getRepresentation());
                     }
@@ -391,13 +405,15 @@ public class NamedEntityTaggingEngine 
      * @param contentItemId the id of the contentItem
      * @param textAnnotation the text annotation to enhance
      * @param subsumedAnnotations other text annotations for the same entity 
+     * @param language the language of the analyzed text or <code>null</code>
+     * if not available.
      * @return the suggested {@link Entity entities}
      * @throws EntityhubException On any Error while looking up Entities via
      * the Entityhub
      */
     protected final List<Entity> computeEntityRecommentations(ReferencedSite site,
             NamedEntity namedEntity,
-            List<UriRef> subsumedAnnotations) throws EntityhubException {
+            List<UriRef> subsumedAnnotations, String language) throws EntityhubException {
         // First get the required properties for the parsed textAnnotation
         // ... and check the values
 
@@ -406,7 +422,16 @@ public class NamedEntityTaggingEngine 
                 entityhub.getQueryFactory().createFieldQuery() : 
                     site.getQueryFactory().createFieldQuery();
         // replace spaces with plus to create an AND search for all words in the name!
-        query.setConstraint(nameField, new TextConstraint(namedEntity.getName()));// name.replace(' ', '+')));
+        Constraint labelConstraint;
+        //TODO: make case sensitivity configurable
+        boolean casesensitive = false;
+        if(language != null){
+            //search labels in the language and without language
+            labelConstraint = new TextConstraint(namedEntity.getName(),casesensitive,language,null);
+        } else {
+            labelConstraint = new TextConstraint(namedEntity.getName(),casesensitive);
+        }
+        query.setConstraint(nameField, labelConstraint);
         if (OntologicalClasses.DBPEDIA_PERSON.equals(namedEntity.getType())) {
             if (personState) {
                 if (personType != null) {
@@ -457,7 +482,7 @@ public class NamedEntityTaggingEngine 
             boolean found = false;
             while(labels.hasNext() && !found){
                 Text label = labels.next();
-                if(label.getLanguage() == null || label.getLanguage().startsWith("en")){
+                if(label.getLanguage() == null || (language != null && label.getLanguage().startsWith(language))){
                     if(label.getText().equalsIgnoreCase(namedEntity.getName())){
                         found = true;
                     }

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java Tue May 15 12:33:05 2012
@@ -463,32 +463,36 @@ public class KeywordLinkingEngine 
         }
     }
     /**
-     * Extracts the language of the parsed ContentItem from the metadata
+     * Extracts the language of the parsed ContentItem by using
+     * {@link EnhancementEngineHelper#getLanguage(ContentItem)} and "en" as
+     * default.
      * @param ci the content item
      * @return the language
      */
     private String extractLanguage(ContentItem ci) {
-        MGraph metadata = ci.getMetadata();
-        Iterator<Triple> langaugeEnhancementCreatorTriples = 
-            metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
-        if(langaugeEnhancementCreatorTriples.hasNext()){
-            String lang = EnhancementEngineHelper.getString(metadata, 
-                langaugeEnhancementCreatorTriples.next().getSubject(), 
-                Properties.DC_LANGUAGE);
-            if(lang != null){
-                return lang;
-            } else {
-                log.warn("Unable to extract language for ContentItem %s! The Enhancement of the %s is missing the %s property",
-                    new Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
-                log.warn(" ... return 'en' as default");
-                return "en";
-            }
+        String lang = EnhancementEngineHelper.getLanguage(ci);
+//        if(lang != null){
+//        MGraph metadata = ci.getMetadata();
+//        Iterator<Triple> langaugeEnhancementCreatorTriples = 
+//            metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
+//        if(langaugeEnhancementCreatorTriples.hasNext()){
+//            String lang = EnhancementEngineHelper.getString(metadata, 
+//                langaugeEnhancementCreatorTriples.next().getSubject(), 
+//                Properties.DC_LANGUAGE);
+        if(lang != null){
+            return lang;
         } else {
-            log.warn("Unable to extract language for ContentItem %s! Is the %s active?",
-                ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+            log.warn("Unable to extract language for ContentItem %s! The Enhancement of the %s is missing the %s property",
+                new Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
             log.warn(" ... return 'en' as default");
             return "en";
         }
+//        } else {
+//            log.warn("Unable to extract language for ContentItem %s! Is the %s active?",
+//                ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+//            log.warn(" ... return 'en' as default");
+//            return "en";
+//        }
     }
 
     

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/langid/pom.xml?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/langid/pom.xml (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/langid/pom.xml Tue May 15 12:33:05 2012
@@ -113,6 +113,18 @@
     </dependency>
 
     <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.test</artifactId>
+      <version>0.10.0-incubating-SNAPSHOT</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.stanbol</groupId>
+      <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+      <version>0.10.0-incubating-SNAPSHOT</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <scope>test</scope>

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java Tue May 15 12:33:05 2012
@@ -34,6 +34,7 @@ import org.apache.felix.scr.annotations.
 import org.apache.felix.scr.annotations.Property;
 import org.apache.felix.scr.annotations.Service;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.Chain;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
@@ -73,7 +74,11 @@ public class LangIdEnhancementEngine 
 
     /**
      * The default value for the Execution of this Engine. Currently set to
-     * {@link ServiceProperties#ORDERING_PRE_PROCESSING}
+     * {@link ServiceProperties#ORDERING_PRE_PROCESSING} - 2<p>
+     * NOTE: this information is used by the default and weighed {@link Chain}
+     * implementation to determine the processing order of 
+     * {@link EnhancementEngine}s. Other {@link Chain} implementation do not
+     * use this information.
      */
     public static final Integer defaultOrder = ORDERING_PRE_PROCESSING - 2;
 

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java?rev=1338669&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java Tue May 15 12:33:05 2012
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.langid;
+
+import static junit.framework.Assert.assertEquals;
+import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllEntityAnnotations;
+import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateLanguageAnnotation;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.PlainLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
+import org.apache.stanbol.enhancer.engines.langid.LangIdEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.tika.language.LanguageIdentifier;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+
+/**
+ * {@link LangIdEngineTest} is a test class for {@link TextCategorizer}.
+ *
+ * @author Joerg Steffen, DFKI
+ * @version $Id: LangIdTest.java 1145590 2011-07-12 13:26:39Z wkasper $
+ */
+public class LangIdEngineTest {
+
+    private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
+    
+    private static final String TEST_FILE_NAME = "en.txt";
+    /**
+     * This contains the text used for testing
+     */
+    private static String text;
+    /**
+     * This initializes the text categorizer.
+     */
+    @BeforeClass
+    public static void oneTimeSetUp() throws IOException {
+        LanguageIdentifier.initProfiles();
+        InputStream in = LangIdEngineTest.class.getClassLoader().getResourceAsStream(
+            TEST_FILE_NAME);
+        assertNotNull("failed to load resource " + TEST_FILE_NAME, in);
+        text = IOUtils.toString(in);
+    }
+
+    /**
+     * Tests the language identification.
+     *
+     * @throws IOException if there is an error when reading the text
+     */
+    @Test
+    public void testLangId() throws IOException {
+        LanguageIdentifier tc = new LanguageIdentifier(text);
+        String language = tc.getLanguage();
+        assertEquals("en", language);
+    }
+
+    @Test
+    public void testEngine() throws EngineException, IOException, ConfigurationException {
+        LangIdEnhancementEngine langIdEngine = new LangIdEnhancementEngine();
+        ComponentContext context =  new MockComponentContext();
+        context.getProperties().put(EnhancementEngine.PROPERTY_NAME, "langid");
+        langIdEngine.activate(context);
+        ContentItem ci = ciFactory.createContentItem(new StringSource(text));
+        langIdEngine.computeEnhancements(ci);
+        HashMap<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
+        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
+        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(
+            langIdEngine.getClass().getName()));
+        PlainLiteral detectedLnaguage = validateLanguageAnnotation(ci.getMetadata(), text,expectedValues);
+        //even through this tests do not validate service quality but rather
+        //the correct integration of the CELI service as EnhancementEngine
+        //we expect the "en" is detected for the parsed text
+        assertEquals("The detected language for text '"+text+"' MUST BE 'en'",
+            "en",detectedLnaguage.getLexicalForm());
+        assertEquals("The value oft the returned language is not the expected one", 
+            detectedLnaguage.getLexicalForm(),EnhancementEngineHelper.getLanguage(ci));
+
+        int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
+        assertEquals("No EntityAnnotations are expected",0, entityAnnoNum);
+
+    }
+}

Propchange: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/MockComponentContext.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/MockComponentContext.java?rev=1338669&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/MockComponentContext.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/MockComponentContext.java Tue May 15 12:33:05 2012
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.langid;
+
+import java.util.Dictionary;
+import java.util.Hashtable;
+
+import org.osgi.framework.Bundle;
+import org.osgi.framework.BundleContext;
+import org.osgi.framework.ServiceReference;
+import org.osgi.service.component.ComponentContext;
+import org.osgi.service.component.ComponentInstance;
+
+public class MockComponentContext implements ComponentContext {
+
+    private final Dictionary properties = new Hashtable();
+    
+    @Override
+    public Dictionary getProperties() {
+        return properties;
+    }
+
+    @Override
+    public Object locateService(String name) {
+        return null;
+    }
+
+    @Override
+    public Object locateService(String name, ServiceReference reference) {
+        return null;
+    }
+
+    @Override
+    public Object[] locateServices(String name) {
+        return null;
+    }
+
+    @Override
+    public BundleContext getBundleContext() {
+        return null;
+    }
+
+    @Override
+    public Bundle getUsingBundle() {
+        return null;
+    }
+
+    @Override
+    public ComponentInstance getComponentInstance() {
+        return null;
+    }
+
+    @Override
+    public void enableComponent(String name) {
+    }
+
+    @Override
+    public void disableComponent(String name) {
+    }
+
+    @Override
+    public ServiceReference getServiceReference() {
+        return null;
+    }
+
+}

Propchange: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/MockComponentContext.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java Tue May 15 12:33:05 2012
@@ -16,6 +16,7 @@
  */
 package org.apache.stanbol.enhancer.engines.langid.core;
 
+import static junit.framework.Assert.assertEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 
@@ -35,16 +36,21 @@ import org.junit.Test;
  */
 public class LangIdTest {
 
+    private static final String TEST_FILE_NAME = "en.txt";
     /**
-     * This contains the text categorizer to test.
+     * This contains the text used for testing
      */
-  
+    private static String text;
     /**
      * This initializes the text categorizer.
      */
     @BeforeClass
     public static void oneTimeSetUp() throws IOException {
         LanguageIdentifier.initProfiles();
+        InputStream in = LangIdTest.class.getClassLoader().getResourceAsStream(
+            TEST_FILE_NAME);
+        assertNotNull("failed to load resource " + TEST_FILE_NAME, in);
+        text = IOUtils.toString(in);
     }
 
     /**
@@ -54,16 +60,8 @@ public class LangIdTest {
      */
     @Test
     public void testLangId() throws IOException {
-        String testFileName = "en.txt";
-
-        InputStream in = this.getClass().getClassLoader().getResourceAsStream(
-                testFileName);
-        assertNotNull("failed to load resource " + testFileName, in);
-
-        String text = IOUtils.toString(in);
         LanguageIdentifier tc = new LanguageIdentifier(text);
         String language = tc.getLanguage();
         assertEquals("en", language);
     }
-
 }

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java Tue May 15 12:33:05 2012
@@ -257,7 +257,7 @@ public class OpenCalaisEngine 
 
     public int canEnhance(ContentItem ci) throws EngineException {
         if(ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES) != null){
-            String language = getMetadataLanguage(ci.getMetadata(), null);
+            String language = EnhancementEngineHelper.getLanguage(ci);
             if (language != null && !SUPPORTED_LANGUAGES.contains(language)) {
                 log.info("OpenCalais can not process ContentItem {} because "
                     + "language {} is not supported (supported: {})",
@@ -320,7 +320,7 @@ public class OpenCalaisEngine 
     public void createEnhancements(Collection<CalaisEntityOccurrence> occs, ContentItem ci) {
         LiteralFactory literalFactory = LiteralFactory.getInstance();
         final Language language; // used for plain literals representing parts fo the content
-        String langString = getMetadataLanguage(ci.getMetadata(), null);
+        String langString = EnhancementEngineHelper.getLanguage(ci);
         if(langString != null && !langString.isEmpty()){
             language = new Language(langString);
         } else {
@@ -593,25 +593,6 @@ public class OpenCalaisEngine 
                 urlConn.getInputStream(), responseEncoding);
     }
 
-    public String getMetadataLanguage(MGraph model, NonLiteral subj) {
-        Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
-        if (it.hasNext()) {
-            Resource langNode = it.next().getObject();
-            return getLexicalForm(langNode);
-        }
-        return null;
-    }
-
-    public String getLexicalForm(Resource res) {
-        if (res == null) {
-            return null;
-        } else if (res instanceof Literal) {
-            return ((Literal) res).getLexicalForm();
-        } else {
-            return res.toString();
-        }
-    }
-
     /**
      * The activate method.
      *

Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java Tue May 15 12:33:05 2012
@@ -473,31 +473,34 @@ public class NEREngineCore implements En
      */
     public static final Literal LANG_ID_ENGINE_NAME = LiteralFactory.getInstance().createTypedLiteral("org.apache.stanbol.enhancer.engines.langid.LangIdEnhancementEngine");
     /**
-     * Extracts the language of the parsed ContentItem from the metadata
+     * Extracts the language of the parsed ContentItem by using
+     * {@link EnhancementEngineHelper#getLanguage(ContentItem)} and 
+     * {@link #defaultLang} as default
      * @param ci the content item
      * @return the language
      */
     private String extractLanguage(ContentItem ci) {
-        MGraph metadata = ci.getMetadata();
-        Iterator<Triple> langaugeEnhancementCreatorTriples = 
-            metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
-        if(langaugeEnhancementCreatorTriples.hasNext()){
-            String lang = EnhancementEngineHelper.getString(metadata, 
-                langaugeEnhancementCreatorTriples.next().getSubject(), 
-                Properties.DC_LANGUAGE);
-            if(lang != null){
-                return lang;
-            } else {
-                log.info("Unable to extract language for ContentItem %s! The Enhancement of the %s is missing the %s property",
-                    new Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
-                log.info(" ... return '{}' as default",defaultLang);
-                return defaultLang;
-            }
+        String lang = EnhancementEngineHelper.getLanguage(ci);
+//        MGraph metadata = ci.getMetadata();
+//        Iterator<Triple> langaugeEnhancementCreatorTriples = 
+//            metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
+//        if(langaugeEnhancementCreatorTriples.hasNext()){
+//            String lang = EnhancementEngineHelper.getString(metadata, 
+//                langaugeEnhancementCreatorTriples.next().getSubject(), 
+//                Properties.DC_LANGUAGE);
+        if(lang != null){
+            return lang;
         } else {
-            log.info("Unable to extract language for ContentItem {}! Is the {} active?",
-                ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+            log.info("Unable to extract language for ContentItem %s! The Enhancement of the %s is missing the %s property",
+                new Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
             log.info(" ... return '{}' as default",defaultLang);
             return defaultLang;
         }
+//        } else {
+//            log.info("Unable to extract language for ContentItem {}! Is the {} active?",
+//                ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+//            log.info(" ... return '{}' as default",defaultLang);
+//            return defaultLang;
+//        }
     }
 }

Modified: incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java Tue May 15 12:33:05 2012
@@ -16,9 +16,18 @@
 */
 package org.apache.stanbol.enhancer.servicesapi.helper;
 
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
+
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.UUID;
 
@@ -31,6 +40,7 @@ import org.apache.clerezza.rdf.core.Trip
 import org.apache.clerezza.rdf.core.TypedLiteral;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.stanbol.enhancer.servicesapi.Chain;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
@@ -42,9 +52,11 @@ import org.slf4j.LoggerFactory;
 
 public class EnhancementEngineHelper {
 
-    protected static Random rng = new Random();
+    protected final static Random rng = new Random();
+
+    private final static Logger log = LoggerFactory.getLogger(EnhancementEngineHelper.class);
 
-    private static final Logger log = LoggerFactory.getLogger(EnhancementEngineHelper.class);
+    private final static LiteralFactory lf = LiteralFactory.getInstance();
 
     public static void setSeed(long seed) {
         rng.setSeed(seed);
@@ -398,4 +410,82 @@ public class EnhancementEngineHelper {
         }
         return ServiceProperties.ORDERING_DEFAULT;
     }
+    
+    /**
+     * Getter for the Resources of fise:TextAnnotations that do have a value 
+     * of the dc:language property. The returned list is sorted by 'fise:confidence'.
+     * Annotations with missing confidence are ranked last.<p>
+     * NOTE that the returned list will likely contain annotations for the same language
+     * if multiple language identification are used in the same {@link Chain}.
+     * @param graph the graph with the enhancement. 
+     * Typically {@link ContentItem#getMetadata()}
+     * @return the sorted list of language annotations or an empty list if none.
+     * @throws IllegalArgumentException if <code>null</code> is parsed as graph
+     */
+    public static List<NonLiteral> getLanguageAnnotations(TripleCollection graph){
+        if(graph == null){
+            throw new IllegalArgumentException("The parsed graph MUST NOT be NULL!");
+        }
+        // I do not use SPARQL, because I do not want to instantiate a QueryEngine
+        final Map<NonLiteral,Double> confidences = new HashMap<NonLiteral,Double>();
+        List<NonLiteral> langAnnotations = new ArrayList<NonLiteral>();
+        Iterator<Triple> textAnnoataions = graph.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
+        while(textAnnoataions.hasNext()){
+            NonLiteral textAnnotation = textAnnoataions.next().getSubject();
+            String language = getString(graph, textAnnotation, DC_LANGUAGE);
+            if(language != null){
+                Double confidence = get(graph, textAnnotation, Properties.ENHANCER_CONFIDENCE, Double.class, lf);
+                confidences.put(textAnnotation,confidence);
+                langAnnotations.add(textAnnotation);
+            }
+        }
+        if(langAnnotations.size() > 1){
+            Collections.sort(langAnnotations,new Comparator<NonLiteral>() {
+                @Override
+                public int compare(NonLiteral o1, NonLiteral o2) {
+                    Double c1 = confidences.get(o1);
+                    Double c2 = confidences.get(o2);
+                    //decrising order (values without confidence last)
+                    if(c1 == null){
+                        return c2 == null ? 0 : 1;
+                    } else if(c2 == null){
+                        return -1;
+                    } else {
+                        return c2.compareTo(c1);
+                    }
+                }
+            });
+        }
+        return langAnnotations;
+    }
+    /**
+     * Getter for language identified for (extracted-from) the parsed
+     * ContentItem. The returned value is the Annotation with the highest
+     * 'fise:confidence' value - or if no annotations are present - the
+     * 'dc-terms:language' value of the {@link ContentItem#getUri()}.<p>
+     * Users that want to obtain all language annotations should use
+     * {@link #getLanguageAnnotations(TripleCollection)} instead.<p>
+     * This method ensures a write lock on the {@link ContentItem}.
+     * @param ci the contentItem
+     * @return the identified language of the parsed {@link ContentItem}.
+     * <code>null</code> if not available.
+     * @throws IllegalArgumentException if <code>null</code> is parsed as content item
+     * @see #getLanguageAnnotations(TripleCollection)
+     */
+    public static String getLanguage(ContentItem ci){
+        if(ci == null){
+            throw new IllegalArgumentException("The parsed ContentItem MUST NOT be NULL!");
+        }
+        ci.getLock().readLock().lock();
+        try {
+            List<NonLiteral> langAnnotations = getLanguageAnnotations(ci.getMetadata());
+            if(langAnnotations.isEmpty()){ //fallback
+                return getString(ci.getMetadata(), ci.getUri(), DC_LANGUAGE);
+            } else {
+                return getString(ci.getMetadata(), langAnnotations.get(0), DC_LANGUAGE);
+            }
+        } finally {
+            ci.getLock().readLock().unlock();
+        }
+    }
 }

Modified: incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java Tue May 15 12:33:05 2012
@@ -18,11 +18,15 @@ import static org.junit.Assert.assertTru
 
 import java.util.Collections;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.clerezza.rdf.core.Literal;
 import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
 import org.apache.clerezza.rdf.core.PlainLiteral;
 import org.apache.clerezza.rdf.core.Resource;
 import org.apache.clerezza.rdf.core.Triple;
@@ -32,6 +36,7 @@ import org.apache.clerezza.rdf.core.UriR
 import org.apache.clerezza.rdf.ontologies.DCTERMS;
 import org.apache.clerezza.rdf.ontologies.XSD;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
 
@@ -379,5 +384,30 @@ public class EnhancementStructureHelper 
             assertFalse("Only a single dc:type value is allowed!", dcTypeIterator.hasNext());
         }
     }
-    
+    public static PlainLiteral validateLanguageAnnotation(MGraph g, String content,HashMap<UriRef,Resource> expectedValues) {
+        Iterator<Triple> textAnnotationIterator = g.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
+        // test if a textAnnotation is present
+        assertTrue("The Language Annotation is missing!",textAnnotationIterator.hasNext());
+        NonLiteral annotation = textAnnotationIterator.next().getSubject();
+        assertTrue("TextAnnotations MUST BE URIs", annotation instanceof UriRef);
+        assertFalse("Only a single Language Annotation is expected!", textAnnotationIterator.hasNext());
+        //validate enhancement metadata (this also checks the confidence)
+        validateEnhancement(g, (UriRef)annotation, expectedValues);
+        //validate the dc:language value
+        Iterator<Triple> languageIterator = g.filter(annotation, Properties.DC_LANGUAGE, null);
+        assertTrue("The fise:TextAnnotation for the language MUST HAVE a value for dc:language!",languageIterator.hasNext());
+        Resource languageResource = languageIterator.next().getObject();
+        assertFalse("Only a single dc:langauge value MUST BE present!", languageIterator.hasNext());
+        assertTrue("The dc:langauge value MUST BE a plain literal",languageResource instanceof PlainLiteral);
+        assertTrue("The dc:language value MIST BE at least two chars long",
+            ((PlainLiteral)languageResource).getLexicalForm().length()>=2);
+        //assert that the created TextAnnotation is correctly returned by the
+        //EnhancementEngineHelper methods
+        List<NonLiteral> languageAnnotation = EnhancementEngineHelper.getLanguageAnnotations(g);
+        assertFalse("No langauge Annotation was extracted by the EnhancementEngineHelper#getLanguageAnnotations(..) method",
+            languageAnnotation.isEmpty());
+        assertEquals("The returned language annotation was not the one created by this engine",
+            annotation, languageAnnotation.get(0));
+        return (PlainLiteral)languageResource;
+    }
 }