You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/05/17 13:21:55 UTC

svn commit: r1339554 - in /incubator/stanbol/trunk/enhancer/generic: servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/ servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/ test/src/main/java/org/apache/stanbol/...

Author: rwesten
Date: Thu May 17 11:21:54 2012
New Revision: 1339554

URL: http://svn.apache.org/viewvc?rev=1339554&view=rev
Log:
STANBOL-613

* merged changes related to STANBOL-613 fromt the CELI enhancement engine branch back to trunk
* added dc:LinguisticSystem  to TechnicalClasses (as it has now to be used as dc:type on fise:TextAnnotations that describe the language of the Text
* incorporated validation of text annotations into the std. validateTextAnnotation method

STANBOL-617

* added validation methods for fise:TopicAnnotations to the EnhancementStructureHelper

general:

* added special tests for TextAnnotations used to describe extracted NamedEntities (because those require a fise:selected-text while TextAnnotations used for topics or language annotations do not.

Modified:
    incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java
    incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/TechnicalClasses.java
    incubator/stanbol/trunk/enhancer/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java

Modified: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java?rev=1339554&r1=1339553&r2=1339554&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java (original)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java Thu May 17 11:21:54 2012
@@ -16,9 +16,18 @@
 */
 package org.apache.stanbol.enhancer.servicesapi.helper;
 
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
+
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.UUID;
 
@@ -31,6 +40,7 @@ import org.apache.clerezza.rdf.core.Trip
 import org.apache.clerezza.rdf.core.TypedLiteral;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.stanbol.enhancer.servicesapi.Chain;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
@@ -42,9 +52,11 @@ import org.slf4j.LoggerFactory;
 
 public class EnhancementEngineHelper {
 
-    protected static Random rng = new Random();
+    protected final static Random rng = new Random();
+
+    private final static Logger log = LoggerFactory.getLogger(EnhancementEngineHelper.class);
 
-    private static final Logger log = LoggerFactory.getLogger(EnhancementEngineHelper.class);
+    private final static LiteralFactory lf = LiteralFactory.getInstance();
 
     public static void setSeed(long seed) {
         rng.setSeed(seed);
@@ -119,6 +131,39 @@ public class EnhancementEngineHelper {
         return enhancement;
     }
     /**
+     * Create a new instance with the types enhancer:Enhancement and
+     * enhancer:TopicAnnotation in the parsed graph along with default properties
+     * (dc:creator, dc:created and enhancer:extracted-form) and return
+     * the UriRef of the extraction so that engines can further add.
+     *
+     * @param metadata the graph
+     * @param engine the engine
+     * @param contentItemId the id
+     *
+     * @return the URI of the new enhancement instance
+     */
+    public static UriRef createTopicEnhancement(MGraph metadata,
+                 EnhancementEngine engine, UriRef contentItemId){
+         UriRef enhancement = createEnhancement(metadata, engine, contentItemId);
+         metadata.add(new TripleImpl(enhancement, Properties.RDF_TYPE,
+                 TechnicalClasses.ENHANCER_TOPICANNOTATION));
+         return enhancement;
+     }
+    /**
+     * Create a new instance with the types enhancer:Enhancement and
+     * enhancer:TopicAnnotation in the metadata-graph of the content
+     * item along with default properties (dc:creator and dc:created) and return
+     * the UriRef of the extraction so that engines can further add
+     *
+     * @param ci the ContentItem being under analysis
+     * @param engine the Engine performing the analysis
+     * @return the URI of the new enhancement instance
+     */
+    public static UriRef createTopicEnhancement(ContentItem ci,
+            EnhancementEngine engine){
+        return createTopicEnhancement(ci.getMetadata(), engine, new UriRef(ci.getUri().getUnicodeString()));
+    }
+    /**
      * Create a new enhancement instance in the metadata-graph of the content
      * item along with default properties (dc:creator and dc:created) and return
      * the UriRef of the extraction so that engines can further add.
@@ -398,4 +443,82 @@ public class EnhancementEngineHelper {
         }
         return ServiceProperties.ORDERING_DEFAULT;
     }
+    
+    /**
+     * Getter for the Resources of fise:TextAnnotations that do have a value 
+     * of the dc:language property. The returned list is sorted by 'fise:confidence'.
+     * Annotations with missing confidence are ranked last.<p>
+     * NOTE that the returned list will likely contain annotations for the same language
+     * if multiple language identification are used in the same {@link Chain}.
+     * @param graph the graph with the enhancement. 
+     * Typically {@link ContentItem#getMetadata()}
+     * @return the sorted list of language annotations or an empty list if none.
+     * @throws IllegalArgumentException if <code>null</code> is parsed as graph
+     */
+    public static List<NonLiteral> getLanguageAnnotations(TripleCollection graph){
+        if(graph == null){
+            throw new IllegalArgumentException("The parsed graph MUST NOT be NULL!");
+        }
+        // I do not use SPARQL, because I do not want to instantiate a QueryEngine
+        final Map<NonLiteral,Double> confidences = new HashMap<NonLiteral,Double>();
+        List<NonLiteral> langAnnotations = new ArrayList<NonLiteral>();
+        Iterator<Triple> textAnnoataions = graph.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
+        while(textAnnoataions.hasNext()){
+            NonLiteral textAnnotation = textAnnoataions.next().getSubject();
+            String language = getString(graph, textAnnotation, DC_LANGUAGE);
+            if(language != null){
+                Double confidence = get(graph, textAnnotation, Properties.ENHANCER_CONFIDENCE, Double.class, lf);
+                confidences.put(textAnnotation,confidence);
+                langAnnotations.add(textAnnotation);
+            }
+        }
+        if(langAnnotations.size() > 1){
+            Collections.sort(langAnnotations,new Comparator<NonLiteral>() {
+                @Override
+                public int compare(NonLiteral o1, NonLiteral o2) {
+                    Double c1 = confidences.get(o1);
+                    Double c2 = confidences.get(o2);
+                    //decrising order (values without confidence last)
+                    if(c1 == null){
+                        return c2 == null ? 0 : 1;
+                    } else if(c2 == null){
+                        return -1;
+                    } else {
+                        return c2.compareTo(c1);
+                    }
+                }
+            });
+        }
+        return langAnnotations;
+    }
+    /**
+     * Getter for language identified for (extracted-from) the parsed
+     * ContentItem. The returned value is the Annotation with the highest
+     * 'fise:confidence' value - or if no annotations are present - the
+     * 'dc-terms:language' value of the {@link ContentItem#getUri()}.<p>
+     * Users that want to obtain all language annotations should use
+     * {@link #getLanguageAnnotations(TripleCollection)} instead.<p>
+     * This method ensures a write lock on the {@link ContentItem}.
+     * @param ci the contentItem
+     * @return the identified language of the parsed {@link ContentItem}.
+     * <code>null</code> if not available.
+     * @throws IllegalArgumentException if <code>null</code> is parsed as content item
+     * @see #getLanguageAnnotations(TripleCollection)
+     */
+    public static String getLanguage(ContentItem ci){
+        if(ci == null){
+            throw new IllegalArgumentException("The parsed ContentItem MUST NOT be NULL!");
+        }
+        ci.getLock().readLock().lock();
+        try {
+            List<NonLiteral> langAnnotations = getLanguageAnnotations(ci.getMetadata());
+            if(langAnnotations.isEmpty()){ //fallback
+                return getString(ci.getMetadata(), ci.getUri(), DC_LANGUAGE);
+            } else {
+                return getString(ci.getMetadata(), langAnnotations.get(0), DC_LANGUAGE);
+            }
+        } finally {
+            ci.getLock().readLock().unlock();
+        }
+    }
 }

Modified: incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/TechnicalClasses.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/TechnicalClasses.java?rev=1339554&r1=1339553&r2=1339554&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/TechnicalClasses.java (original)
+++ incubator/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/TechnicalClasses.java Thu May 17 11:21:54 2012
@@ -24,8 +24,9 @@ import org.apache.clerezza.rdf.core.UriR
  *
  * @author ogrisel
  */
-public class TechnicalClasses {
-
+public final class TechnicalClasses {
+    
+    private TechnicalClasses() {}
     /**
      * Type used for all enhancement created by Stanbol Enhancer
      */
@@ -87,11 +88,22 @@ public class TechnicalClasses {
      * Used to indicate, that an EntityAnnotation describes an Categorisation.
      * see <a href="http://wiki.iks-project.eu/index.php/ZemantaEnhancementEngine#Mapping_of_Categories">
      * Mapping of Categories</a> for more Information)
+     * @deprecated the preferred rdf:type for categories and topics is
+     * {@link OntologicalClasses#SKOS_CONCEPT} (see 
+     * <a href="https://issues.apache.org/jira/browse/STANBOL-617">STANBOL-617</a>)
      */
     public static final UriRef ENHANCER_CATEGORY = new UriRef(
             NamespaceEnum.fise + "Category");
 
-    private TechnicalClasses() {
-    }
+    /**
+     * DC terms Linguistic System is the type used as Range for the dc:language
+     * property. As this property is also used for describing the language
+     * as identified for analysed content this type is used as dc:type for
+     * {@value #ENHANCER_TEXTANNOTATION} describing the language of the text
+     * (see 
+     * <a href="https://issues.apache.org/jira/browse/STANBOL-613">STANBOL-613</a>)
+     */
+    public static final UriRef DCTERMS_LINGUISTIC_SYSTEM = new UriRef(
+            NamespaceEnum.dc + "LinguisticSystem");
 
 }

Modified: incubator/stanbol/trunk/enhancer/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java?rev=1339554&r1=1339553&r2=1339554&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java (original)
+++ incubator/stanbol/trunk/enhancer/generic/test/src/main/java/org/apache/stanbol/enhancer/test/helper/EnhancementStructureHelper.java Thu May 17 11:21:54 2012
@@ -1,6 +1,11 @@
 package org.apache.stanbol.enhancer.test.helper;
 
+import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.DBPEDIA_ORGANISATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.DBPEDIA_PERSON;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.DBPEDIA_PLACE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
@@ -8,8 +13,11 @@ import static org.apache.stanbol.enhance
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.DCTERMS_LINGUISTIC_SYSTEM;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENHANCEMENT;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TOPICANNOTATION;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
@@ -18,11 +26,15 @@ import static org.junit.Assert.assertTru
 
 import java.util.Collections;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.clerezza.rdf.core.Literal;
 import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
 import org.apache.clerezza.rdf.core.PlainLiteral;
 import org.apache.clerezza.rdf.core.Resource;
 import org.apache.clerezza.rdf.core.Triple;
@@ -32,6 +44,8 @@ import org.apache.clerezza.rdf.core.UriR
 import org.apache.clerezza.rdf.ontologies.DCTERMS;
 import org.apache.clerezza.rdf.ontologies.XSD;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
 
@@ -53,7 +67,8 @@ public class EnhancementStructureHelper 
         Iterator<Triple> textAnnotationIterator = enhancements.filter(null,
                 RDF_TYPE, ENHANCER_TEXTANNOTATION);
         // test if a textAnnotation is present
-        assertTrue(textAnnotationIterator.hasNext());
+        //assertTrue(textAnnotationIterator.hasNext()); 
+        //  -> this might be used to test that there are no TextAnnotations
         int textAnnotationCount = 0;
         while (textAnnotationIterator.hasNext()) {
             UriRef textAnnotation = (UriRef) textAnnotationIterator.next().getSubject();
@@ -77,37 +92,52 @@ public class EnhancementStructureHelper 
      * @param expectedValues expected values (properties for the values are used as keys)
      */
     public static void validateTextAnnotation(TripleCollection enhancements, UriRef textAnnotation, String content, Map<UriRef,Resource> expectedValues) {
+        //validate the rdf:type
+        Iterator<Triple> rdfTypeIterator = enhancements.filter(textAnnotation, RDF_TYPE, ENHANCER_TEXTANNOTATION);
+        assertTrue("Parsed Enhancement "+textAnnotation +" is missing the fise:TextAnnotation type ",
+            rdfTypeIterator.hasNext());
         Iterator<Triple> selectedTextIterator = enhancements.filter(textAnnotation,
                 ENHANCER_SELECTED_TEXT, null);
-        // check if the selected text is added
-        assertTrue("TextAnnotations MUST have a fise:selected-text value",selectedTextIterator.hasNext());
-        // test if the selected text is part of the TEXT_TO_TEST
-        Resource selectedTextResource = selectedTextIterator.next().getObject();
-        assertTrue("fise:selected-text MUST BE of type PlainLiteral",selectedTextResource instanceof PlainLiteral);
-        Literal selectedText = (Literal)selectedTextResource;
-        assertTrue("The parsed content MUST contain the fise:selected-text value '"
-            +selectedText.getLexicalForm()+"'!",content.contains(selectedText.getLexicalForm()));
+        // check if the selected text is added (or not)
+        Resource selectedTextResource;
+        if(selectedTextIterator.hasNext()){
+            // test if the selected text is part of the TEXT_TO_TEST
+            selectedTextResource = selectedTextIterator.next().getObject();
+            assertTrue("fise:selected-text MUST BE of type PlainLiteral (uri: "+textAnnotation+")",
+                selectedTextResource instanceof PlainLiteral);
+            Literal selectedText = (Literal)selectedTextResource;
+            assertTrue("The parsed content MUST contain the fise:selected-text value '"
+                +selectedText.getLexicalForm()+"' (uri: "+textAnnotation+")!",content.contains(selectedText.getLexicalForm()));
+        } else {
+            selectedTextResource = null; //no selected text
+        }
+        //check against an expected value
         Resource expectedSelectedText = expectedValues.get(ENHANCER_SELECTED_TEXT);
         if(expectedSelectedText != null){
-            assertEquals("The fise:selected-text is not the expected value "+expectedSelectedText+"!",
-                expectedSelectedText, selectedText);
+            assertEquals("The fise:selected-text is not the expected value "+expectedSelectedText+" (uri: "+textAnnotation+")!",
+                expectedSelectedText, selectedTextResource);
         }
         Resource selectionContextResource;
         // test if context is added
         Iterator<Triple> selectionContextIterator = enhancements.filter(textAnnotation,
                 ENHANCER_SELECTION_CONTEXT, null);
         if(selectionContextIterator.hasNext()) { //context is optional
+            //selection context is not allowed without selected-text
+            assertNotNull("If fise:selection-context is present also fise:selected-text MUST BE present (uri: "+textAnnotation+")",
+                selectedTextResource);
             // test if the selected text is part of the TEXT_TO_TEST
             selectionContextResource = selectionContextIterator.next().getObject();
-            assertTrue("The fise:selection-context MUST BE of type PlainLiteral",selectionContextResource instanceof PlainLiteral);
+            assertTrue("The fise:selection-context MUST BE of type PlainLiteral (uri: "+textAnnotation+")",
+                selectionContextResource instanceof PlainLiteral);
             //check that the content contains the context
             assertTrue("The fise:selection-context MUST BE contained in the Content | context= "+ selectionContextResource,
             content.contains(((Literal)selectionContextResource).getLexicalForm()));
             //check that the context contains the selected text
             assertTrue("The fise:selected-text value MUST BE containted within the fise:selection-context value",
                 ((Literal)selectionContextResource).getLexicalForm().contains(
-                    selectedText.getLexicalForm()));
+                    ((Literal)selectedTextResource).getLexicalForm()));
         } else {
+            assertNull("If no fise:selection-context is present also fise:selected-text MUST BE NOT present!", selectedTextResource);
             selectionContextResource = null;
         }
         Resource expectedSelectionContext = expectedValues.get(ENHANCER_SELECTION_CONTEXT);
@@ -124,36 +154,42 @@ public class EnhancementStructureHelper 
         TypedLiteral startPosLiteral;
         TypedLiteral endPosLiteral;
         if(startPosIterator.hasNext()){
-            assertNotNull("If fise:start is present the fise:selection-context MUST also be present!",
-                selectionContextResource);
+            //NOTE: TextAnnotations might be use to select whole sections of a text
+            //      (e.g. see STANBOL-617) in those cases adding the text of the
+            //      whole section is not feasible.
+            //assertNotNull("If fise:start is present the fise:selection-context MUST also be present (uri: "+textAnnotation+")!",
+            //    selectionContextResource);
             Resource resource = startPosIterator.next().getObject();
             //only a single start position is supported
-            assertFalse("fise:start MUST HAVE only a single value!",startPosIterator.hasNext());
-            assertTrue("fise:start MUST be a typed Literal!",resource instanceof TypedLiteral);
+            assertFalse("fise:start MUST HAVE only a single value (uri: "+textAnnotation+")!",startPosIterator.hasNext());
+            assertTrue("fise:start MUST be a typed Literal (uri: "+textAnnotation+")!",resource instanceof TypedLiteral);
             startPosLiteral = (TypedLiteral) resource;
-            assertEquals("fise:start MUST use xsd:int as data type",XSD.int_, startPosLiteral.getDataType());
+            assertEquals("fise:start MUST use xsd:int as data type (uri: "+textAnnotation+")",XSD.int_, startPosLiteral.getDataType());
             resource = null;
             Integer start = LiteralFactory.getInstance().createObject(Integer.class, startPosLiteral);
             assertNotNull("Unable to parse Integer from TypedLiteral "+startPosLiteral,start);
             //now get the end
             //end must be defined if start is present
-            assertTrue("If fise:start is present also fise:end MUST BE defined!",endPosIterator.hasNext());
+            assertTrue("If fise:start is present also fise:end MUST BE defined (uri: "+textAnnotation+")!",endPosIterator.hasNext());
             resource = endPosIterator.next().getObject();
             //only a single end position is supported
-            assertFalse("fise:end MUST HAVE only a single value!",endPosIterator.hasNext());
-            assertTrue("fise:end values MUST BE TypedLiterals",resource instanceof TypedLiteral);
+            assertFalse("fise:end MUST HAVE only a single value (uri: "+textAnnotation+")!",endPosIterator.hasNext());
+            assertTrue("fise:end values MUST BE TypedLiterals (uri: "+textAnnotation+")",resource instanceof TypedLiteral);
             endPosLiteral = (TypedLiteral) resource;
-            assertEquals("fise:end MUST use xsd:int as data type",XSD.int_, endPosLiteral.getDataType());
+            assertEquals("fise:end MUST use xsd:int as data type (uri: "+textAnnotation+")",XSD.int_, endPosLiteral.getDataType());
             resource = null;
             Integer end = LiteralFactory.getInstance().createObject(Integer.class, endPosLiteral);
             assertNotNull("Unable to parse Integer from TypedLiteral "+endPosLiteral,end);
             endPosLiteral = null;
             //check for equality of the selected text and the text on the selected position in the content
             //System.out.println("TA ["+start+"|"+end+"]"+selectedText.getLexicalForm()+"<->"+content.substring(start,end));
-            assertEquals("the substring [fise:start,fise:end] does not correspond to "
-                + "the fise:selected-text value '"+selectedText.getLexicalForm()
-                + "' of this TextAnnotation!",content.substring(start, end), selectedText.getLexicalForm());
+            if(selectedTextResource != null){
+                assertEquals("the substring [fise:start,fise:end] does not correspond to "
+                    + "the fise:selected-text value '"+((Literal)selectedTextResource).getLexicalForm()
+                    + "' of this TextAnnotation!",content.substring(start, end), ((Literal)selectedTextResource).getLexicalForm());
+            } // else no selected-text present ... unable to test this
         } else {
+            assertNull("if fise:selected-text is present also fise:start AND fise:end MUST BE present!",selectedTextResource);
             assertNull("If fise:selection-context is present also fise:start AND fise:end MUST BE present!",selectionContextResource);
             assertFalse("if fise:end is presnet also fise:start AND fise:selection-context MUST BE present!",endPosIterator.hasNext());
             startPosLiteral = null;
@@ -172,8 +208,76 @@ public class EnhancementStructureHelper 
         
         //validate fise:Enhancement specific rules
         validateEnhancement(enhancements, textAnnotation, expectedValues);
+        
+        //validate for special TextAnnotations
+        validateLanguageAnnotations(enhancements,textAnnotation);
+        validateNERAnnotations(enhancements,textAnnotation, selectedTextResource);
+    }
+    /**
+     * Validates the correctness of fise:TextAnnotations that annotate the language 
+     * of the text as defined by 
+     * <a href="https://issues.apache.org/jira/browse/STANBOL-613">STANBOL-613</a><p>
+     * Called by {@link #validateTextAnnotation(TripleCollection, UriRef, String, Map)}
+     * @param enhancements
+     * @param textAnnotation
+     */
+    private static void validateLanguageAnnotations(TripleCollection enhancements, UriRef textAnnotation) {
+        Iterator<Triple> dcLanguageIterator = enhancements.filter(textAnnotation, DC_LANGUAGE, null);
+        if(dcLanguageIterator.hasNext()){ //a language annotation
+            Resource dcLanguageResource = dcLanguageIterator.next().getObject();
+            assertTrue("The dc:language value MUST BE a PlainLiteral", dcLanguageResource instanceof PlainLiteral);
+            assertTrue("The dc:language value '"+dcLanguageResource+"'MUST BE at least two chars long", 
+                ((Literal)dcLanguageResource).getLexicalForm().length() >=2);
+            assertFalse("TextAnnotations with the dc:language property MUST only have a single dc:language value (uri "
+                    +textAnnotation+")",dcLanguageIterator.hasNext());
+
+            Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
+            assertTrue("TextAnnotations with the dc:language property MUST use dc:type dc:LinguisticSystem (uri "
+                +textAnnotation+")", dcTypeIterator.hasNext());
+            assertEquals("TextAnnotations with the dc:language property MUST use dc:type dc:LinguisticSystem (uri "
+                +textAnnotation+")", DCTERMS_LINGUISTIC_SYSTEM,dcTypeIterator.next().getObject());
+            assertFalse("TextAnnotations with the dc:language property MUST only have a single dc:type value (uri "
+                +textAnnotation+")",dcTypeIterator.hasNext());
+            //assert that the created TextAnnotation is correctly returned by the
+            //EnhancementEngineHelper methods
+            List<NonLiteral> languageAnnotation = EnhancementEngineHelper.getLanguageAnnotations(enhancements);
+            assertTrue("Language annotation "+textAnnotation+" was not returned by "
+                +"EnhancementEngineHelper.getLanguageAnnotations(..)!",languageAnnotation.contains(textAnnotation));
+        } else { //no language annotation
+            Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
+            while(dcTypeIterator.hasNext()){
+                assertFalse("Only fise:TextAnnotations without a dc:language value MUST NOT use the "
+                    + "dc:type value dc:LinguisticSystem (uri "+textAnnotation+")",
+                    DCTERMS_LINGUISTIC_SYSTEM.equals(dcTypeIterator.next().getObject()));
+            }
+        }
+        
+    }
+    /**
+     * Validates that fise:TextAnnotations with the dc:type dbp-ont:Person,
+     * dbp-ont:Organisation and dbp-ont:Place do have a
+     * fise:selected-text value (this implicitly also checks that
+     * fise:selection-context, fise:start and fise:end are defined!<p>
+     * Called by {@link #validateTextAnnotation(TripleCollection, UriRef, String, Map)}
+     * @param enhancements
+     * @param textAnnotation
+     * @param selectedTextResource the fise:selected-text value
+     */
+    private static void validateNERAnnotations(TripleCollection enhancements, UriRef textAnnotation, Resource selectedTextResource) {
+        Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
+        boolean isNERAnnotation = false;
+        while(dcTypeIterator.hasNext() && !isNERAnnotation){
+            Resource dcTypeValue = dcTypeIterator.next().getObject();
+            isNERAnnotation = DBPEDIA_PERSON.equals(dcTypeValue) ||
+                    DBPEDIA_ORGANISATION.equals(dcTypeValue) ||
+                    DBPEDIA_PLACE.equals(dcTypeValue);
+        }
+        if(isNERAnnotation){
+            assertNotNull("fise:TextAnnotations with a dc:type of c:type dbp-ont:Person, "
+                +"dbp-ont:Organisation or dbp-ont:Place MUST have a fise:selected-text value (uri "
+                    +textAnnotation+")", selectedTextResource);
+        }
     }
-    
     /**
      * Validates all fise:EntityAnnotations contained by the parsed enhancements
      * graph.
@@ -212,11 +316,7 @@ public class EnhancementStructureHelper 
         Iterator<Triple> relationToTextAnnotationIterator = enhancements.filter(
                 entityAnnotation, DC_RELATION, null);
         // check if the relation to the text annotation is set
-        //TODO: currently it is not required that all EntityAnnotations are linked to
-        //      an TextAnnotation, because EntityAnnotations are also used for 
-        //      Topics (that do not explicitly occur in texts.
-        //      This might change as soon there is an own Topic type!
-        //assertTrue(relationToTextAnnotationIterator.hasNext());
+        assertTrue(relationToTextAnnotationIterator.hasNext());
         while (relationToTextAnnotationIterator.hasNext()) {
             // test if the referred annotations are text annotations
             UriRef referredTextAnnotation = (UriRef) relationToTextAnnotationIterator.next().getObject();
@@ -280,6 +380,10 @@ public class EnhancementStructureHelper 
      * @param expectedValues expected values (properties for the values are used as keys)
      */
     public static void validateEnhancement(TripleCollection enhancements, UriRef enhancement, Map<UriRef,Resource> expectedValues){
+        //validate the rdf:type
+        Iterator<Triple> rdfTypeIterator = enhancements.filter(enhancement, RDF_TYPE, ENHANCER_ENHANCEMENT);
+        assertTrue("Parsed Enhancement "+enhancement +" is missing the fise:Enhancement type ",
+            rdfTypeIterator.hasNext());
         //validate the creator
         Iterator<Triple> creatorIterator = enhancements.filter(enhancement, Properties.DC_CREATOR, null);
         assertTrue("Enhancements MUST HAVE a creator",creatorIterator.hasNext());
@@ -379,5 +483,107 @@ public class EnhancementStructureHelper 
             assertFalse("Only a single dc:type value is allowed!", dcTypeIterator.hasNext());
         }
     }
+    /**
+     * Validates all fise:TopicAnnotations contained by the parsed enhancements
+     * graph.
+     * @param enhancements the enhancement graph
+     * @param expectedValues the expected values of all validated TopicAnnotations.
+     * Properties are used as keys. Typical example would be fise:extracted-from
+     * with the id of the ContentItem as value; dc-terms:creator with the
+     * {@link Class#getName()} as value.
+     * @return the number of found and validated TopicAnnotations.
+     */
+    @SuppressWarnings("unchecked")
+    public static int validateAllTopicAnnotations(TripleCollection enhancements,Map<UriRef,Resource> expectedValues) {
+        expectedValues = expectedValues == null ? Collections.EMPTY_MAP : expectedValues;
+        Iterator<Triple> topicAnnotationIterator = enhancements.filter(null,
+                RDF_TYPE, ENHANCER_TOPICANNOTATION);
+        int topicAnnotationCount = 0;
+        while (topicAnnotationIterator.hasNext()) {
+            UriRef topicAnnotation = (UriRef) topicAnnotationIterator.next().getSubject();
+            // test if selected Text is added
+            validateTopicAnnotation(enhancements, topicAnnotation, 
+                expectedValues);
+            topicAnnotationCount++;
+        }
+        return topicAnnotationCount;
+    }
+    
+    /**
+     * Checks if a fise:TopicAnnotation is valid as defined by 
+     * <a herf="https://issues.apache.org/jira/browse/STANBOL-617">STANBOL-617</a>. 
+     * NOTE that this also validates all fise:Enhancement related requirements by 
+     * calling {@link #validateEnhancement(TripleCollection, UriRef, Map)}
+     * @param enhancements the enhancements graph
+     * @param topicAnnotation the topic annotation to validate
+     * @param expectedValues expected values (properties for the values are used as keys)
+     */
+    public static void validateTopicAnnotation(TripleCollection enhancements, UriRef topicAnnotation, Map<UriRef,Resource> expectedValues){
+        //validate the rdf:type
+        Iterator<Triple> rdfTypeIterator = enhancements.filter(topicAnnotation, RDF_TYPE, ENHANCER_TOPICANNOTATION);
+        assertTrue("Parsed Enhancement "+topicAnnotation +" is missing the fise:TopicAnnotation type ",
+            rdfTypeIterator.hasNext());
+        
+        //TopicAnnotations need to be linked to TextAnnotations describing the
+        //section of the text that has a specific Topic.
+        //If the topic is for the whole text the TextAnnotation will have no
+        //selected-text value
+        Iterator<Triple> relationToTextAnnotationIterator = enhancements.filter(
+            topicAnnotation, DC_RELATION, null);
+        // check if the relation to the text annotation is set
+        assertTrue(relationToTextAnnotationIterator.hasNext());
+        while (relationToTextAnnotationIterator.hasNext()) {
+            // test if the referred annotations are text annotations
+            UriRef referredTextAnnotation = (UriRef) relationToTextAnnotationIterator.next().getObject();
+            assertTrue(enhancements.filter(referredTextAnnotation, RDF_TYPE,
+                    ENHANCER_TEXTANNOTATION).hasNext());
+        }
+    
+        // test if an entity (the topic) is referred (NOTE: in contrast to
+        // fise:EntityAnnotations this property is NOT required - cardinality [0..*]
+        Iterator<Triple> entityReferenceIterator = enhancements.filter(topicAnnotation,
+                ENHANCER_ENTITY_REFERENCE, null);
+        Resource expectedReferencedEntity = expectedValues.get(ENHANCER_ENTITY_REFERENCE);
+        while(entityReferenceIterator.hasNext()){ //check possible multiple references
+            Resource entityReferenceResource = entityReferenceIterator.next().getObject();
+            // test if the reference is an URI
+            assertTrue("fise:entity-reference value MUST BE of URIs",entityReferenceResource instanceof UriRef);
+            if(expectedReferencedEntity != null && expectedReferencedEntity.equals(entityReferenceResource)){
+                expectedReferencedEntity = null; //found
+            }
+        }
+        assertNull("EntityAnnotation "+topicAnnotation+"fise:entity-reference has not the expected value "
+                +expectedReferencedEntity+"!", expectedReferencedEntity);
+        
+        //test if the entity label is set (required)
+        Iterator<Triple> entityLabelIterator = enhancements.filter(topicAnnotation, ENHANCER_ENTITY_LABEL, null);
+        assertTrue(entityLabelIterator.hasNext());
+        Resource expectedEntityLabel = expectedValues.get(ENHANCER_ENTITY_LABEL);
+        while(entityLabelIterator.hasNext()){
+            Resource entityLabelResource =  entityLabelIterator.next().getObject();
+            assertTrue("fise:entity-label values MUST BE PlainLiterals (EntityAnnotation: "+topicAnnotation+")!",
+                entityLabelResource instanceof PlainLiteral);
+            if(expectedEntityLabel != null && expectedEntityLabel.equals(entityLabelResource)){
+                expectedEntityLabel = null;
+            }
+        }
+        assertNull("The expected EntityLabel "+expectedEntityLabel+" was not found",
+            expectedEntityLabel);
+        
+        // test fise:entity-type(s). NOTE: this is not required - cardinality [0..*]
+        Iterator<Triple> entityTypeIterator = enhancements.filter(topicAnnotation, Properties.ENHANCER_ENTITY_TYPE, null);
+        Resource expectedEntityType = expectedValues.get(Properties.ENHANCER_ENTITY_TYPE);
+        if(entityTypeIterator.hasNext()){
+            Resource entityTypeResource = entityTypeIterator.next().getObject();
+            assertTrue("fise:entity-type values MUST BE URIs",entityTypeResource instanceof UriRef);
+            if(expectedEntityType != null && expectedEntityType.equals(entityTypeResource)){
+                expectedEntityType = null; //found
+            }
+        }
+        assertNull("The expected fise:entity-type value "+expectedEntityType+" was not found!", expectedEntityType);
+        
+        //test all properties required by fise:Enhancement
+        validateEnhancement(enhancements, topicAnnotation, expectedValues);
+    }
     
 }