You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/05/15 14:33:06 UTC
svn commit: r1338669 - in
/incubator/stanbol/branches/celi-enhancement-engines:
engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/
engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl...
Author: rwesten
Date: Tue May 15 12:33:05 2012
New Revision: 1338669
URL: http://svn.apache.org/viewvc?rev=1338669&view=rev
Log:
Implementation of STANBOL-613 within the CELI enhancement engine branch
* add getLanguageAnnotations nad getLanguage utility methods to the EnhancementEngineHelper
* updates Engines to use this Utility
* Adds UnitTests to the LanguageId and CELI Language Identification Engines to test that Enhancements created by those engines are correctly processed by the new utility methods
Other changes:
* STANBOL-612: moved validation method for LanguageAnnotations from the CELI Lanugage Identification Engine to the EnhancementStructureHelper
* Use this method for validating enhancements created by the LangId Engine (the one based on Apache Tika)
Added:
incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java (with props)
incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/MockComponentContext.java (with props)
Modified:
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java
incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java
incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
incubator/stanbol/branches/celi-enhancement-engines/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
incubator/stanbol/branches/celi-enhancement-engines/engines/langid/pom.xml
incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java
incubator/stanbol/branches/celi-enhancement-engines/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
incubator/stanbol/branches/celi-enhancement-engines/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java
incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngine.java Tue May 15 12:33:05 2012
@@ -133,7 +133,7 @@ public class CeliClassificationEnhanceme
@Override
public int canEnhance(ContentItem ci) throws EngineException {
- this.language = extractLanguage(ci);
+ this.language = EnhancementEngineHelper.getLanguage(ci);
if (language == null) {
throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
@@ -145,46 +145,10 @@ public class CeliClassificationEnhanceme
}
- /**
- * Extracts the language of the parsed ContentItem from the metadata
- * @param ci the content item
- * @return the language
- */
- private String extractLanguage(ContentItem ci) {
- MGraph metadata = ci.getMetadata();
- Iterator<Triple> langaugeEnhancementCreatorTriples =
- metadata.filter(null, DC_CREATOR, LANG_ID_ENGINE_NAME);
- if(langaugeEnhancementCreatorTriples.hasNext()){
- String lang = EnhancementEngineHelper.getString(metadata,
- langaugeEnhancementCreatorTriples.next().getSubject(), DC_LANGUAGE);
- if(lang != null){
- return lang;
- } else {
- log.info("Unable to extract language for ContentItem "+ci.getUri().getUnicodeString()+"! The Enhancement of the "+LANG_ID_ENGINE_NAME.getLexicalForm()+
- " is missing the "+DC_LANGUAGE+" property ... return '{}' as default");
- return null;
- }
- } else {
-
- Iterator<Triple> it = metadata.filter(null, DC_LANGUAGE, null);
- if (it.hasNext()) {
- Resource res = it.next().getObject();
- if (res instanceof Literal) {
- return ((Literal) res).getLexicalForm();
- } else {
- return res.toString();
- }
- }
-
- log.warn("Unable to extract language for ContentItem "+ci.getUri().getUnicodeString()+"! Is the "+LANG_ID_ENGINE_NAME.getLexicalForm()+" active? ... return '{}' as default");
- return null;
- }
- }
-
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
if (this.language == null)
- this.language = extractLanguage(ci);
+ this.language = EnhancementEngineHelper.getLanguage(ci);
Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java Tue May 15 12:33:05 2012
@@ -142,7 +142,7 @@ public class CeliLemmatizerEnhancementEn
@Override
public int canEnhance(ContentItem ci) throws EngineException {
- this.language = extractLanguage(ci);
+ this.language = EnhancementEngineHelper.getLanguage(ci);
if (language == null) {
throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
@@ -153,46 +153,10 @@ public class CeliLemmatizerEnhancementEn
return CANNOT_ENHANCE;
}
- /**
- * Extracts the language of the parsed ContentItem from the metadata
- *
- * @param ci
- * the content item
- * @return the language
- */
- private String extractLanguage(ContentItem ci) {
- MGraph metadata = ci.getMetadata();
- Iterator<Triple> langaugeEnhancementCreatorTriples = metadata.filter(null, DC_CREATOR, LANG_ID_ENGINE_NAME);
- if (langaugeEnhancementCreatorTriples.hasNext()) {
- String lang = EnhancementEngineHelper.getString(metadata, langaugeEnhancementCreatorTriples.next().getSubject(), DC_LANGUAGE);
- if (lang != null) {
- return lang;
- } else {
- log.info("Unable to extract language for ContentItem " + ci.getUri().getUnicodeString() + "! The Enhancement of the " + LANG_ID_ENGINE_NAME.getLexicalForm() + " is missing the " + DC_LANGUAGE + " property ... return '{}' as default");
-
- return null;
- }
- } else {
-
- Iterator<Triple> it = metadata.filter(null, DC_LANGUAGE, null);
- if (it.hasNext()) {
- Resource res = it.next().getObject();
- if (res instanceof Literal) {
- return ((Literal) res).getLexicalForm();
- } else {
- return res.toString();
- }
- }
-
- log.warn("Unable to extract language for ContentItem " + ci.getUri().getUnicodeString() + "! Is the " + LANG_ID_ENGINE_NAME.getLexicalForm() + " active? ... return '{}' as default");
- return null;
- }
- }
-
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
if (this.language == null)
- this.language = extractLanguage(ci);
+ this.language = EnhancementEngineHelper.getLanguage(ci);
Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngine.java Tue May 15 12:33:05 2012
@@ -182,7 +182,7 @@ public class CeliNamedEntityExtractionEn
@Override
public int canEnhance(ContentItem ci) throws EngineException {
- String language = extractLanguage(ci);
+ String language = EnhancementEngineHelper.getLanguage(ci);
if (language == null) {
log.info("Unable to extract language annotation for ContentItem -> will not enhance",
ci.getUri());
@@ -201,40 +201,6 @@ public class CeliNamedEntityExtractionEn
return CANNOT_ENHANCE;
}
- /**
- * Extracts the language of the parsed ContentItem from the metadata
- *
- * @param ci
- * the content item
- * @return the language
- */
- private String extractLanguage(ContentItem ci) {
- MGraph metadata = ci.getMetadata();
- Iterator<Triple> langaugeEnhancementCreatorTriples = metadata.filter(null, DC_CREATOR, LANG_ID_ENGINE_NAME);
- if (langaugeEnhancementCreatorTriples.hasNext()) {
- String lang = EnhancementEngineHelper.getString(metadata, langaugeEnhancementCreatorTriples.next().getSubject(), DC_LANGUAGE);
- if (lang != null) {
- return lang;
- } else {
- log.info("Unable to extract language for ContentItem " + ci.getUri().getUnicodeString() + "! The Enhancement of the " + LANG_ID_ENGINE_NAME.getLexicalForm() + " is missing the " + DC_LANGUAGE + " property ... return '{}' as default");
- return null;
- }
- } else {
-
- Iterator<Triple> it = metadata.filter(null, DC_LANGUAGE, null);
- if (it.hasNext()) {
- Resource res = it.next().getObject();
- if (res instanceof Literal) {
- return ((Literal) res).getLexicalForm();
- } else {
- return res.toString();
- }
- }
-
- log.warn("Unable to extract language for ContentItem " + ci.getUri().getUnicodeString() + "! Is the " + LANG_ID_ENGINE_NAME.getLexicalForm() + " active? ... return '{}' as default");
- return null;
- }
- }
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
@@ -253,7 +219,7 @@ public class CeliNamedEntityExtractionEn
log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
return;
}
- String language = extractLanguage(ci);
+ String language = EnhancementEngineHelper.getLanguage(ci);
if (language == null) {
throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/resources/OSGI-INF/metatype/metatype.properties Tue May 15 12:33:05 2012
@@ -52,11 +52,11 @@ org.apache.stanbol.enhancer.engines.celi
org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.name=Apache Stanbol Enhancer Engine: CELI Language Identifier
org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.description=An Enhancement Engine that sends ContentItems to Language Identifier Web Service and converts the results to the Stanbol Enhancement Structure
-org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.license.name=License Key
-org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.license.description=The key needed to access the CELI Language Identifier Web Service
+org.apache.stanbol.enhancer.engines.celi.langid.license.name=License Key
+org.apache.stanbol.enhancer.engines.celi.langid.license.description=The key needed to access the CELI Language Identifier Web Service
-org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.url.name=Service URL
-org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine.url.description=The URL of the CELI Language Identifier Web Service
+org.apache.stanbol.enhancer.engines.celi.langid.url.name=Service URL
+org.apache.stanbol.enhancer.engines.celi.langid.url.description=The URL of the CELI Language Identifier Web Service
#LEMM
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/classification/impl/CeliClassificationEnhancementEngineTest.java Tue May 15 12:33:05 2012
@@ -1,5 +1,6 @@
package org.apache.stanbol.enhancer.engines.celi.classification.impl;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
@@ -14,9 +15,11 @@ import java.util.Iterator;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
import org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngineTest;
-import org.apache.stanbol.enhancer.engines.celi.test_utils.MockComponentContext;
+import org.apache.stanbol.enhancer.engines.celi.testutils.MockComponentContext;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -65,8 +68,12 @@ public class CeliClassificationEnhanceme
public void tesetEngine() throws Exception {
ContentItem ci = wrapAsContentItem(TEXT);
try {
- CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
-
+ //add a simple triple to statically define the language of the test
+ //content
+ ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("fr")));
+ //unit test should not depend on each other (if possible)
+ //CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
+
classificationEngine.computeEnhancements(ci);
int textAnnoNum = checkAllTextAnnotations(ci.getMetadata(), TEXT);
log.info(textAnnoNum + " TextAnnotations found ...");
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/langid/impl/CeliLanguageIdentifierEnhancementEngineTest.java Tue May 15 12:33:05 2012
@@ -7,6 +7,7 @@ import static org.apache.stanbol.enhance
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllEntityAnnotations;
import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateEnhancement;
+import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateLanguageAnnotation;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
@@ -15,6 +16,7 @@ import java.util.Dictionary;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
+import java.util.List;
import junit.framework.Assert;
@@ -91,10 +93,11 @@ public class CeliLanguageIdentifierEnhan
//we expect the "fr" is detected for the parsed text
assertEquals("The detected language for text '"+TEXT+"' MUST BE 'fr'",
"fr",detectedLnaguage.getLexicalForm());
+ assertEquals("The value oft the returned language is not the expected one",
+ detectedLnaguage.getLexicalForm(),EnhancementEngineHelper.getLanguage(ci));
int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
assertEquals("No EntityAnnotations are expected",0, entityAnnoNum);
- log.info(entityAnnoNum + " EntityAnnotations found ...");
} catch (EngineException e) {
if (e.getCause() != null && e.getCause() instanceof UnknownHostException) {
log.warn("Celi Service not reachable -> offline? -> deactivate test");
@@ -104,26 +107,6 @@ public class CeliLanguageIdentifierEnhan
}
}
-
- private PlainLiteral validateLanguageAnnotation(MGraph g, String content,HashMap<UriRef,Resource> expectedValues) {
- Iterator<Triple> textAnnotationIterator = g.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
- // test if a textAnnotation is present
- assertTrue("The Language Annotation is missing!",textAnnotationIterator.hasNext());
- NonLiteral annotation = textAnnotationIterator.next().getSubject();
- assertTrue("TextAnnotations MUST BE URIs", annotation instanceof UriRef);
- assertFalse("Only a single Language Annotation is expected!", textAnnotationIterator.hasNext());
- //validate enhancement metadata (this also checks the confidence)
- validateEnhancement(g, (UriRef)annotation, expectedValues);
- //validate the dc:language value
- Iterator<Triple> languageIterator = g.filter(annotation, Properties.DC_LANGUAGE, null);
- assertTrue("The fise:TextAnnotation for the language MUST HAVE a value for dc:language!",languageIterator.hasNext());
- Resource languageResource = languageIterator.next().getObject();
- assertFalse("Only a single dc:langauge value MUST BE present!", languageIterator.hasNext());
- assertTrue("The dc:langauge value MUST BE a plain literal",languageResource instanceof PlainLiteral);
- assertTrue("The dc:language value MIST BE at least two chars long",
- ((PlainLiteral)languageResource).getLexicalForm().length()>=2);
- return (PlainLiteral)languageResource;
- }
// removed: other tests now add a simple triple with <{ciUri},dc:langauge,{lang}>
/** public static void addEnanchements(ContentItem ci) throws IOException, ConfigurationException, EngineException {
//Add guessed language
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java Tue May 15 12:33:05 2012
@@ -1,5 +1,6 @@
package org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
@@ -14,9 +15,11 @@ import java.util.Iterator;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
import org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngineTest;
-import org.apache.stanbol.enhancer.engines.celi.test_utils.MockComponentContext;
+import org.apache.stanbol.enhancer.engines.celi.testutils.MockComponentContext;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -64,8 +67,12 @@ public class CeliLemmatizerEnhancementEn
ContentItem ci = wrapAsContentItem(TEXT);
try {
- CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
-
+ //add a simple triple to statically define the language of the test
+ //content
+ ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("fr")));
+ //unit test should not depend on each other (if possible)
+ //CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
+
morphoAnalysisEngine.computeEnhancements(ci);
int textAnnoNum = checkAllTextAnnotations(ci.getMetadata(), TEXT);
log.info(textAnnoNum + " TextAnnotations found ...");
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/CeliNamedEntityExtractionEnhancementEngineTest.java Tue May 15 12:33:05 2012
@@ -18,7 +18,7 @@ import org.apache.stanbol.enhancer.conte
import org.apache.stanbol.enhancer.engines.celi.classification.impl.CeliClassificationEnhancementEngine;
import org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngineTest;
import org.apache.stanbol.enhancer.engines.celi.ner.impl.CeliNamedEntityExtractionEnhancementEngine;
-import org.apache.stanbol.enhancer.engines.celi.test_utils.MockComponentContext;
+import org.apache.stanbol.enhancer.engines.celi.testutils.MockComponentContext;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/EnhancementRDFUtils.java Tue May 15 12:33:05 2012
@@ -63,6 +63,7 @@ public class EnhancementRDFUtils {
* @param entity
* the related entity
* @param nameField the field used to extract the name
+ * @param lang the preferred language to include
*/
public static UriRef writeEntityAnnotation(EnhancementEngine engine,
LiteralFactory literalFactory,
@@ -70,7 +71,8 @@ public class EnhancementRDFUtils {
UriRef contentItemId,
Collection<NonLiteral> relatedEnhancements,
Representation rep,
- String nameField) {
+ String nameField,
+ String lang) {
// 1. check if the returned Entity does has a label -> if not return null
// add labels (set only a single label. Use "en" if available!
Text label = null;
@@ -81,7 +83,7 @@ public class EnhancementRDFUtils {
label = actLabel;
} else {
//use startWith to match also en-GB and en-US ...
- if (actLabel.getLanguage() != null && actLabel.getLanguage().startsWith("en")) {
+ if (actLabel.getLanguage() != null && actLabel.getLanguage().startsWith(lang)) {
label = actLabel;
}
}
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntityTaggingEngine.java Tue May 15 12:33:05 2012
@@ -49,6 +49,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
@@ -61,6 +62,7 @@ import org.apache.stanbol.entityhub.serv
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.apache.stanbol.entityhub.servicesapi.model.Text;
import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+import org.apache.stanbol.entityhub.servicesapi.query.Constraint;
import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
import org.apache.stanbol.entityhub.servicesapi.query.QueryResultList;
import org.apache.stanbol.entityhub.servicesapi.query.ReferenceConstraint;
@@ -126,6 +128,11 @@ public class NamedEntityTaggingEngine
@Property(intValue=0)
public static final String SERVICE_RANKING = Constants.SERVICE_RANKING;
+ /**
+ * The default language for labels included in the enhancement metadata
+ * (if not available for the parsed content).
+ */
+ private static final String DEFAULT_LANGUAGE = "en";
/**
* Service of the Entityhub that manages all the active referenced Site. This Service is used to lookup the
@@ -154,6 +161,7 @@ public class NamedEntityTaggingEngine
public static final Integer defaultOrder = ORDERING_EXTRACTION_ENHANCEMENT;
+
/**
* State if text annotations of type {@link OntologicalClasses#DBPEDIA_PERSON} are enhanced by this engine
*/
@@ -319,8 +327,11 @@ public class NamedEntityTaggingEngine
LiteralFactory literalFactory = LiteralFactory.getInstance();
// Retrieve the existing text annotations (requires read lock)
Map<NamedEntity,List<UriRef>> textAnnotations = new HashMap<NamedEntity,List<UriRef>>();
+ //the language extracted for the parsed content or NULL if not available
+ String contentLangauge;
ci.getLock().readLock().lock();
try {
+ contentLangauge = EnhancementEngineHelper.getLanguage(ci);
for (Iterator<Triple> it = graph.filter(null, RDF_TYPE, TechnicalClasses.ENHANCER_TEXTANNOTATION); it
.hasNext();) {
UriRef uri = (UriRef) it.next().getSubject();
@@ -346,7 +357,7 @@ public class NamedEntityTaggingEngine
for (Entry<NamedEntity,List<UriRef>> entry : textAnnotations.entrySet()) {
try {
List<Entity> entitySuggestions = computeEntityRecommentations(
- site, entry.getKey(),entry.getValue());
+ site, entry.getKey(),entry.getValue(),contentLangauge);
if(entitySuggestions != null && !entitySuggestions.isEmpty()){
suggestions.put(entry.getKey(), entitySuggestions);
}
@@ -366,7 +377,10 @@ public class NamedEntityTaggingEngine
for(Entity suggestion : entitySuggestions.getValue()){
log.debug("Add Suggestion {} for {}", suggestion.getId(), entitySuggestions.getKey());
EnhancementRDFUtils.writeEntityAnnotation(this, literalFactory, graph, ci.getUri(),
- annotationsToRelate, suggestion.getRepresentation(), nameField);
+ annotationsToRelate, suggestion.getRepresentation(), nameField,
+ //TODO: maybe we want labels in a different language than the
+ // language of the content (e.g. Accept-Language header)?!
+ contentLangauge == null ? DEFAULT_LANGUAGE : contentLangauge);
if (dereferenceEntities) {
entityData.put(suggestion.getId(), suggestion.getRepresentation());
}
@@ -391,13 +405,15 @@ public class NamedEntityTaggingEngine
* @param contentItemId the id of the contentItem
* @param textAnnotation the text annotation to enhance
* @param subsumedAnnotations other text annotations for the same entity
+ * @param language the language of the analyzed text or <code>null</code>
+ * if not available.
* @return the suggested {@link Entity entities}
* @throws EntityhubException On any Error while looking up Entities via
* the Entityhub
*/
protected final List<Entity> computeEntityRecommentations(ReferencedSite site,
NamedEntity namedEntity,
- List<UriRef> subsumedAnnotations) throws EntityhubException {
+ List<UriRef> subsumedAnnotations, String language) throws EntityhubException {
// First get the required properties for the parsed textAnnotation
// ... and check the values
@@ -406,7 +422,16 @@ public class NamedEntityTaggingEngine
entityhub.getQueryFactory().createFieldQuery() :
site.getQueryFactory().createFieldQuery();
// replace spaces with plus to create an AND search for all words in the name!
- query.setConstraint(nameField, new TextConstraint(namedEntity.getName()));// name.replace(' ', '+')));
+ Constraint labelConstraint;
+ //TODO: make case sensitivity configurable
+ boolean casesensitive = false;
+ if(language != null){
+ //search labels in the language and without language
+ labelConstraint = new TextConstraint(namedEntity.getName(),casesensitive,language,null);
+ } else {
+ labelConstraint = new TextConstraint(namedEntity.getName(),casesensitive);
+ }
+ query.setConstraint(nameField, labelConstraint);
if (OntologicalClasses.DBPEDIA_PERSON.equals(namedEntity.getType())) {
if (personState) {
if (personType != null) {
@@ -457,7 +482,7 @@ public class NamedEntityTaggingEngine
boolean found = false;
while(labels.hasNext() && !found){
Text label = labels.next();
- if(label.getLanguage() == null || label.getLanguage().startsWith("en")){
+ if(label.getLanguage() == null || (language != null && label.getLanguage().startsWith(language))){
if(label.getText().equalsIgnoreCase(namedEntity.getName())){
found = true;
}
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java Tue May 15 12:33:05 2012
@@ -463,32 +463,36 @@ public class KeywordLinkingEngine
}
}
/**
- * Extracts the language of the parsed ContentItem from the metadata
+ * Extracts the language of the parsed ContentItem by using
+ * {@link EnhancementEngineHelper#getLanguage(ContentItem)} and "en" as
+ * default.
* @param ci the content item
* @return the language
*/
private String extractLanguage(ContentItem ci) {
- MGraph metadata = ci.getMetadata();
- Iterator<Triple> langaugeEnhancementCreatorTriples =
- metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
- if(langaugeEnhancementCreatorTriples.hasNext()){
- String lang = EnhancementEngineHelper.getString(metadata,
- langaugeEnhancementCreatorTriples.next().getSubject(),
- Properties.DC_LANGUAGE);
- if(lang != null){
- return lang;
- } else {
- log.warn("Unable to extract language for ContentItem %s! The Enhancement of the %s is missing the %s property",
- new Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
- log.warn(" ... return 'en' as default");
- return "en";
- }
+ String lang = EnhancementEngineHelper.getLanguage(ci);
+// if(lang != null){
+// MGraph metadata = ci.getMetadata();
+// Iterator<Triple> langaugeEnhancementCreatorTriples =
+// metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
+// if(langaugeEnhancementCreatorTriples.hasNext()){
+// String lang = EnhancementEngineHelper.getString(metadata,
+// langaugeEnhancementCreatorTriples.next().getSubject(),
+// Properties.DC_LANGUAGE);
+ if(lang != null){
+ return lang;
} else {
- log.warn("Unable to extract language for ContentItem %s! Is the %s active?",
- ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+ log.warn("Unable to extract language for ContentItem %s! The Enhancement of the %s is missing the %s property",
+ new Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
log.warn(" ... return 'en' as default");
return "en";
}
+// } else {
+// log.warn("Unable to extract language for ContentItem %s! Is the %s active?",
+// ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+// log.warn(" ... return 'en' as default");
+// return "en";
+// }
}
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/pom.xml
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/langid/pom.xml?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/langid/pom.xml (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/langid/pom.xml Tue May 15 12:33:05 2012
@@ -113,6 +113,18 @@
</dependency>
<dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.test</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.enhancer.core</artifactId>
+ <version>0.10.0-incubating-SNAPSHOT</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/main/java/org/apache/stanbol/enhancer/engines/langid/LangIdEnhancementEngine.java Tue May 15 12:33:05 2012
@@ -34,6 +34,7 @@ import org.apache.felix.scr.annotations.
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.Chain;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
@@ -73,7 +74,11 @@ public class LangIdEnhancementEngine
/**
* The default value for the Execution of this Engine. Currently set to
- * {@link ServiceProperties#ORDERING_PRE_PROCESSING}
+ * {@link ServiceProperties#ORDERING_PRE_PROCESSING} - 2<p>
+ * NOTE: this information is used by the default and weighed {@link Chain}
+ * implementation to determine the processing order of
+ * {@link EnhancementEngine}s. Other {@link Chain} implementation do not
+ * use this information.
*/
public static final Integer defaultOrder = ORDERING_PRE_PROCESSING - 2;
Added: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java?rev=1338669&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java Tue May 15 12:33:05 2012
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.langid;
+
+import static junit.framework.Assert.assertEquals;
+import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllEntityAnnotations;
+import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateLanguageAnnotation;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.PlainLiteral;
+import org.apache.clerezza.rdf.core.Resource;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
+import org.apache.stanbol.enhancer.engines.langid.LangIdEnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.apache.tika.language.LanguageIdentifier;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+
+/**
+ * {@link LangIdEngineTest} is a test class for {@link TextCategorizer}.
+ *
+ * @author Joerg Steffen, DFKI
+ * @version $Id: LangIdTest.java 1145590 2011-07-12 13:26:39Z wkasper $
+ */
+public class LangIdEngineTest {
+
+ private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
+
+ private static final String TEST_FILE_NAME = "en.txt";
+ /**
+ * This contains the text used for testing
+ */
+ private static String text;
+ /**
+ * This initializes the text categorizer.
+ */
+ @BeforeClass
+ public static void oneTimeSetUp() throws IOException {
+ LanguageIdentifier.initProfiles();
+ InputStream in = LangIdEngineTest.class.getClassLoader().getResourceAsStream(
+ TEST_FILE_NAME);
+ assertNotNull("failed to load resource " + TEST_FILE_NAME, in);
+ text = IOUtils.toString(in);
+ }
+
+ /**
+ * Tests the language identification.
+ *
+ * @throws IOException if there is an error when reading the text
+ */
+ @Test
+ public void testLangId() throws IOException {
+ LanguageIdentifier tc = new LanguageIdentifier(text);
+ String language = tc.getLanguage();
+ assertEquals("en", language);
+ }
+
+ @Test
+ public void testEngine() throws EngineException, IOException, ConfigurationException {
+ LangIdEnhancementEngine langIdEngine = new LangIdEnhancementEngine();
+ ComponentContext context = new MockComponentContext();
+ context.getProperties().put(EnhancementEngine.PROPERTY_NAME, "langid");
+ langIdEngine.activate(context);
+ ContentItem ci = ciFactory.createContentItem(new StringSource(text));
+ langIdEngine.computeEnhancements(ci);
+ HashMap<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
+ expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
+ expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(
+ langIdEngine.getClass().getName()));
+ PlainLiteral detectedLnaguage = validateLanguageAnnotation(ci.getMetadata(), text,expectedValues);
+ //even through this tests do not validate service quality but rather
+ //the correct integration of the CELI service as EnhancementEngine
+ //we expect the "en" is detected for the parsed text
+ assertEquals("The detected language for text '"+text+"' MUST BE 'en'",
+ "en",detectedLnaguage.getLexicalForm());
+ assertEquals("The value oft the returned language is not the expected one",
+ detectedLnaguage.getLexicalForm(),EnhancementEngineHelper.getLanguage(ci));
+
+ int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
+ assertEquals("No EntityAnnotations are expected",0, entityAnnoNum);
+
+ }
+}
Propchange: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/LangIdEngineTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/MockComponentContext.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/MockComponentContext.java?rev=1338669&view=auto
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/MockComponentContext.java (added)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/MockComponentContext.java Tue May 15 12:33:05 2012
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.engines.langid;
+
+import java.util.Dictionary;
+import java.util.Hashtable;
+
+import org.osgi.framework.Bundle;
+import org.osgi.framework.BundleContext;
+import org.osgi.framework.ServiceReference;
+import org.osgi.service.component.ComponentContext;
+import org.osgi.service.component.ComponentInstance;
+
+public class MockComponentContext implements ComponentContext {
+
+ private final Dictionary properties = new Hashtable();
+
+ @Override
+ public Dictionary getProperties() {
+ return properties;
+ }
+
+ @Override
+ public Object locateService(String name) {
+ return null;
+ }
+
+ @Override
+ public Object locateService(String name, ServiceReference reference) {
+ return null;
+ }
+
+ @Override
+ public Object[] locateServices(String name) {
+ return null;
+ }
+
+ @Override
+ public BundleContext getBundleContext() {
+ return null;
+ }
+
+ @Override
+ public Bundle getUsingBundle() {
+ return null;
+ }
+
+ @Override
+ public ComponentInstance getComponentInstance() {
+ return null;
+ }
+
+ @Override
+ public void enableComponent(String name) {
+ }
+
+ @Override
+ public void disableComponent(String name) {
+ }
+
+ @Override
+ public ServiceReference getServiceReference() {
+ return null;
+ }
+
+}
Propchange: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/MockComponentContext.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/langid/src/test/java/org/apache/stanbol/enhancer/engines/langid/core/LangIdTest.java Tue May 15 12:33:05 2012
@@ -16,6 +16,7 @@
*/
package org.apache.stanbol.enhancer.engines.langid.core;
+import static junit.framework.Assert.assertEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
@@ -35,16 +36,21 @@ import org.junit.Test;
*/
public class LangIdTest {
+ private static final String TEST_FILE_NAME = "en.txt";
/**
- * This contains the text categorizer to test.
+ * This contains the text used for testing
*/
-
+ private static String text;
/**
* This initializes the text categorizer.
*/
@BeforeClass
public static void oneTimeSetUp() throws IOException {
LanguageIdentifier.initProfiles();
+ InputStream in = LangIdTest.class.getClassLoader().getResourceAsStream(
+ TEST_FILE_NAME);
+ assertNotNull("failed to load resource " + TEST_FILE_NAME, in);
+ text = IOUtils.toString(in);
}
/**
@@ -54,16 +60,8 @@ public class LangIdTest {
*/
@Test
public void testLangId() throws IOException {
- String testFileName = "en.txt";
-
- InputStream in = this.getClass().getClassLoader().getResourceAsStream(
- testFileName);
- assertNotNull("failed to load resource " + testFileName, in);
-
- String text = IOUtils.toString(in);
LanguageIdentifier tc = new LanguageIdentifier(text);
String language = tc.getLanguage();
assertEquals("en", language);
}
-
}
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/opencalais/src/main/java/org/apache/stanbol/enhancer/engines/opencalais/impl/OpenCalaisEngine.java Tue May 15 12:33:05 2012
@@ -257,7 +257,7 @@ public class OpenCalaisEngine
public int canEnhance(ContentItem ci) throws EngineException {
if(ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES) != null){
- String language = getMetadataLanguage(ci.getMetadata(), null);
+ String language = EnhancementEngineHelper.getLanguage(ci);
if (language != null && !SUPPORTED_LANGUAGES.contains(language)) {
log.info("OpenCalais can not process ContentItem {} because "
+ "language {} is not supported (supported: {})",
@@ -320,7 +320,7 @@ public class OpenCalaisEngine
public void createEnhancements(Collection<CalaisEntityOccurrence> occs, ContentItem ci) {
LiteralFactory literalFactory = LiteralFactory.getInstance();
final Language language; // used for plain literals representing parts fo the content
- String langString = getMetadataLanguage(ci.getMetadata(), null);
+ String langString = EnhancementEngineHelper.getLanguage(ci);
if(langString != null && !langString.isEmpty()){
language = new Language(langString);
} else {
@@ -593,25 +593,6 @@ public class OpenCalaisEngine
urlConn.getInputStream(), responseEncoding);
}
- public String getMetadataLanguage(MGraph model, NonLiteral subj) {
- Iterator<Triple> it = model.filter(subj, DC_LANGUAGE, null);
- if (it.hasNext()) {
- Resource langNode = it.next().getObject();
- return getLexicalForm(langNode);
- }
- return null;
- }
-
- public String getLexicalForm(Resource res) {
- if (res == null) {
- return null;
- } else if (res instanceof Literal) {
- return ((Literal) res).getLexicalForm();
- } else {
- return res.toString();
- }
- }
-
/**
* The activate method.
*
Modified: incubator/stanbol/branches/celi-enhancement-engines/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java Tue May 15 12:33:05 2012
@@ -473,31 +473,34 @@ public class NEREngineCore implements En
*/
public static final Literal LANG_ID_ENGINE_NAME = LiteralFactory.getInstance().createTypedLiteral("org.apache.stanbol.enhancer.engines.langid.LangIdEnhancementEngine");
/**
- * Extracts the language of the parsed ContentItem from the metadata
+ * Extracts the language of the parsed ContentItem by using
+ * {@link EnhancementEngineHelper#getLanguage(ContentItem)} and
+ * {@link #defaultLang} as default
* @param ci the content item
* @return the language
*/
private String extractLanguage(ContentItem ci) {
- MGraph metadata = ci.getMetadata();
- Iterator<Triple> langaugeEnhancementCreatorTriples =
- metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
- if(langaugeEnhancementCreatorTriples.hasNext()){
- String lang = EnhancementEngineHelper.getString(metadata,
- langaugeEnhancementCreatorTriples.next().getSubject(),
- Properties.DC_LANGUAGE);
- if(lang != null){
- return lang;
- } else {
- log.info("Unable to extract language for ContentItem %s! The Enhancement of the %s is missing the %s property",
- new Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
- log.info(" ... return '{}' as default",defaultLang);
- return defaultLang;
- }
+ String lang = EnhancementEngineHelper.getLanguage(ci);
+// MGraph metadata = ci.getMetadata();
+// Iterator<Triple> langaugeEnhancementCreatorTriples =
+// metadata.filter(null, Properties.DC_CREATOR, LANG_ID_ENGINE_NAME);
+// if(langaugeEnhancementCreatorTriples.hasNext()){
+// String lang = EnhancementEngineHelper.getString(metadata,
+// langaugeEnhancementCreatorTriples.next().getSubject(),
+// Properties.DC_LANGUAGE);
+ if(lang != null){
+ return lang;
} else {
- log.info("Unable to extract language for ContentItem {}! Is the {} active?",
- ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+ log.info("Unable to extract language for ContentItem %s! The Enhancement of the %s is missing the %s property",
+ new Object[]{ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm(),Properties.DC_LANGUAGE});
log.info(" ... return '{}' as default",defaultLang);
return defaultLang;
}
+// } else {
+// log.info("Unable to extract language for ContentItem {}! Is the {} active?",
+// ci.getUri().getUnicodeString(),LANG_ID_ENGINE_NAME.getLexicalForm());
+// log.info(" ... return '{}' as default",defaultLang);
+// return defaultLang;
+// }
}
}
Modified: incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java Tue May 15 12:33:05 2012
@@ -16,9 +16,18 @@
*/
package org.apache.stanbol.enhancer.servicesapi.helper;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
+
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
+import java.util.HashMap;
import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
import java.util.Random;
import java.util.UUID;
@@ -31,6 +40,7 @@ import org.apache.clerezza.rdf.core.Trip
import org.apache.clerezza.rdf.core.TypedLiteral;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.stanbol.enhancer.servicesapi.Chain;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
@@ -42,9 +52,11 @@ import org.slf4j.LoggerFactory;
public class EnhancementEngineHelper {
- protected static Random rng = new Random();
+ protected final static Random rng = new Random();
+
+ private final static Logger log = LoggerFactory.getLogger(EnhancementEngineHelper.class);
- private static final Logger log = LoggerFactory.getLogger(EnhancementEngineHelper.class);
+ private final static LiteralFactory lf = LiteralFactory.getInstance();
public static void setSeed(long seed) {
rng.setSeed(seed);
@@ -398,4 +410,82 @@ public class EnhancementEngineHelper {
}
return ServiceProperties.ORDERING_DEFAULT;
}
+
+ /**
+ * Getter for the Resources of fise:TextAnnotations that do have a value
+ * of the dc:language property. The returned list is sorted by 'fise:confidence'.
+ * Annotations with missing confidence are ranked last.<p>
+ * NOTE that the returned list will likely contain annotations for the same language
+ * if multiple language identification are used in the same {@link Chain}.
+ * @param graph the graph with the enhancement.
+ * Typically {@link ContentItem#getMetadata()}
+ * @return the sorted list of language annotations or an empty list if none.
+ * @throws IllegalArgumentException if <code>null</code> is parsed as graph
+ */
+ public static List<NonLiteral> getLanguageAnnotations(TripleCollection graph){
+ if(graph == null){
+ throw new IllegalArgumentException("The parsed graph MUST NOT be NULL!");
+ }
+ // I do not use SPARQL, because I do not want to instantiate a QueryEngine
+ final Map<NonLiteral,Double> confidences = new HashMap<NonLiteral,Double>();
+ List<NonLiteral> langAnnotations = new ArrayList<NonLiteral>();
+ Iterator<Triple> textAnnoataions = graph.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
+ while(textAnnoataions.hasNext()){
+ NonLiteral textAnnotation = textAnnoataions.next().getSubject();
+ String language = getString(graph, textAnnotation, DC_LANGUAGE);
+ if(language != null){
+ Double confidence = get(graph, textAnnotation, Properties.ENHANCER_CONFIDENCE, Double.class, lf);
+ confidences.put(textAnnotation,confidence);
+ langAnnotations.add(textAnnotation);
+ }
+ }
+ if(langAnnotations.size() > 1){
+ Collections.sort(langAnnotations,new Comparator<NonLiteral>() {
+ @Override
+ public int compare(NonLiteral o1, NonLiteral o2) {
+ Double c1 = confidences.get(o1);
+ Double c2 = confidences.get(o2);
+ //decrising order (values without confidence last)
+ if(c1 == null){
+ return c2 == null ? 0 : 1;
+ } else if(c2 == null){
+ return -1;
+ } else {
+ return c2.compareTo(c1);
+ }
+ }
+ });
+ }
+ return langAnnotations;
+ }
+ /**
+ * Getter for language identified for (extracted-from) the parsed
+ * ContentItem. The returned value is the Annotation with the highest
+ * 'fise:confidence' value - or if no annotations are present - the
+ * 'dc-terms:language' value of the {@link ContentItem#getUri()}.<p>
+ * Users that want to obtain all language annotations should use
+ * {@link #getLanguageAnnotations(TripleCollection)} instead.<p>
+ * This method ensures a write lock on the {@link ContentItem}.
+ * @param ci the contentItem
+ * @return the identified language of the parsed {@link ContentItem}.
+ * <code>null</code> if not available.
+ * @throws IllegalArgumentException if <code>null</code> is parsed as content item
+ * @see #getLanguageAnnotations(TripleCollection)
+ */
+ public static String getLanguage(ContentItem ci){
+ if(ci == null){
+ throw new IllegalArgumentException("The parsed ContentItem MUST NOT be NULL!");
+ }
+ ci.getLock().readLock().lock();
+ try {
+ List<NonLiteral> langAnnotations = getLanguageAnnotations(ci.getMetadata());
+ if(langAnnotations.isEmpty()){ //fallback
+ return getString(ci.getMetadata(), ci.getUri(), DC_LANGUAGE);
+ } else {
+ return getString(ci.getMetadata(), langAnnotations.get(0), DC_LANGUAGE);
+ }
+ } finally {
+ ci.getLock().readLock().unlock();
+ }
+ }
}
Modified: incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java?rev=1338669&r1=1338668&r2=1338669&view=diff
==============================================================================
--- incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java (original)
+++ incubator/stanbol/branches/celi-enhancement-engines/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java Tue May 15 12:33:05 2012
@@ -18,11 +18,15 @@ import static org.junit.Assert.assertTru
import java.util.Collections;
import java.util.Date;
+import java.util.HashMap;
import java.util.Iterator;
+import java.util.List;
import java.util.Map;
import org.apache.clerezza.rdf.core.Literal;
import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.PlainLiteral;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
@@ -32,6 +36,7 @@ import org.apache.clerezza.rdf.core.UriR
import org.apache.clerezza.rdf.ontologies.DCTERMS;
import org.apache.clerezza.rdf.ontologies.XSD;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
@@ -379,5 +384,30 @@ public class EnhancementStructureHelper
assertFalse("Only a single dc:type value is allowed!", dcTypeIterator.hasNext());
}
}
-
+ public static PlainLiteral validateLanguageAnnotation(MGraph g, String content,HashMap<UriRef,Resource> expectedValues) {
+ Iterator<Triple> textAnnotationIterator = g.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
+ // test if a textAnnotation is present
+ assertTrue("The Language Annotation is missing!",textAnnotationIterator.hasNext());
+ NonLiteral annotation = textAnnotationIterator.next().getSubject();
+ assertTrue("TextAnnotations MUST BE URIs", annotation instanceof UriRef);
+ assertFalse("Only a single Language Annotation is expected!", textAnnotationIterator.hasNext());
+ //validate enhancement metadata (this also checks the confidence)
+ validateEnhancement(g, (UriRef)annotation, expectedValues);
+ //validate the dc:language value
+ Iterator<Triple> languageIterator = g.filter(annotation, Properties.DC_LANGUAGE, null);
+ assertTrue("The fise:TextAnnotation for the language MUST HAVE a value for dc:language!",languageIterator.hasNext());
+ Resource languageResource = languageIterator.next().getObject();
+ assertFalse("Only a single dc:langauge value MUST BE present!", languageIterator.hasNext());
+ assertTrue("The dc:langauge value MUST BE a plain literal",languageResource instanceof PlainLiteral);
+ assertTrue("The dc:language value MIST BE at least two chars long",
+ ((PlainLiteral)languageResource).getLexicalForm().length()>=2);
+ //assert that the created TextAnnotation is correctly returned by the
+ //EnhancementEngineHelper methods
+ List<NonLiteral> languageAnnotation = EnhancementEngineHelper.getLanguageAnnotations(g);
+ assertFalse("No langauge Annotation was extracted by the EnhancementEngineHelper#getLanguageAnnotations(..) method",
+ languageAnnotation.isEmpty());
+ assertEquals("The returned language annotation was not the one created by this engine",
+ annotation, languageAnnotation.get(0));
+ return (PlainLiteral)languageResource;
+ }
}