You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/03/19 19:25:59 UTC

svn commit: r1458446 - in /stanbol/trunk/enhancer/generic/servicesapi/src: main/java/org/apache/stanbol/enhancer/servicesapi/helper/ main/java/org/apache/stanbol/enhancer/servicesapi/rdf/ main/resources/ test/java/org/apache/stanbol/enhancer/serviceapi...

Author: rwesten
Date: Tue Mar 19 18:25:59 2013
New Revision: 1458446

URL: http://svn.apache.org/r1458446
Log:
STANBOL-987: Added new properties to fise.owl and Properties.java; Added setOccurrence(..) helper to EnhancementEngineHelper; Added UnitTest for the new features

Modified:
    stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java
    stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java
    stanbol/trunk/enhancer/generic/servicesapi/src/main/resources/fise.owl
    stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/EnhancementEngineHelperTest.java

Modified: stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java?rev=1458446&r1=1458445&r2=1458446&view=diff
==============================================================================
--- stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java (original)
+++ stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/helper/EnhancementEngineHelper.java Tue Mar 19 18:25:59 2013
@@ -17,9 +17,8 @@
 package org.apache.stanbol.enhancer.servicesapi.helper;
 
 import static java.util.Collections.singleton;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
-import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.*;
+import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.*;
 
 import java.util.ArrayList;
 import java.util.Collection;
@@ -33,6 +32,7 @@ import java.util.Map;
 import java.util.Random;
 import java.util.UUID;
 
+import org.apache.clerezza.rdf.core.Language;
 import org.apache.clerezza.rdf.core.Literal;
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
@@ -42,13 +42,12 @@ import org.apache.clerezza.rdf.core.Trip
 import org.apache.clerezza.rdf.core.TripleCollection;
 import org.apache.clerezza.rdf.core.TypedLiteral;
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.stanbol.enhancer.servicesapi.Chain;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
-import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
-import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -96,11 +95,70 @@ public class EnhancementEngineHelper {
                 EnhancementEngine engine, UriRef contentItemId){
         UriRef enhancement = createEnhancement(metadata, engine,contentItemId);
         //add the Text Annotation Type
-        metadata.add(new TripleImpl(enhancement, Properties.RDF_TYPE,
-                TechnicalClasses.ENHANCER_TEXTANNOTATION));
+        metadata.add(new TripleImpl(enhancement, RDF_TYPE,
+                ENHANCER_TEXTANNOTATION));
         return enhancement;
     }
     /**
+     * This method sets the fise:start, fise:end, fise:selection-prefix, 
+     * fise:selected-text and fise:selection-suffix properties for the 
+     * parsed fise:TextAnnotation instance according to the parsed parameters.<p>
+     * While it is intended to be used for TextAnnotations this method can also
+     * be used to add the mentioned properties to {@link UriRef}s with different
+     * type.<p>
+     * <b>NOTE</b> the <code>allowSelectionHeadTail</code>: This parameter allows
+     * to deactivate the usage of fise:selection-head and fise:selection-tail.
+     * Typically users should parse <code>false</code> in case of 'named entities'
+     * and <code>true</code> in case sections of the text (e.g. phrases, sentences,
+     * chapters ...) are selected.
+     * @param metadata The RDF graph to add the information
+     * @param textAnnotation the UriRef of the fise:TextAnnotation
+     * @param content the plain text content as String
+     * @param start the start index of the occurrence 
+     * @param end the end index of the occurrence
+     * @param lang the lanugage of the content or <code>null</code> if not known
+     * @param prefixSuffixSize the size of the prefix, suffix. If the parsed
+     * value &lt; 3 than the default 10 is used.
+     * @param allowSelectionHeadTail if <code>true</code> the fise:selection-head
+     * and fise:selection-tail properties are used instead of fise:selected-text
+     * if the selected text is longer as <code>Math.max(30, prefixSuffixSize*5);</code>.
+     * If <code>false</code> the fise:selected-text is added regardless of the
+     * size of the selected area.
+     * @since 0.11.0
+     */
+    public static void setOccurrence(MGraph metadata, UriRef textAnnotation,
+            String content, Integer start, Integer end, Language lang, int prefixSuffixSize, 
+            boolean allowSelectionHeadTail){
+        //set start, end
+        metadata.add(new TripleImpl(textAnnotation, ENHANCER_START, 
+            lf.createTypedLiteral(start)));
+        metadata.add(new TripleImpl(textAnnotation, ENHANCER_END, 
+            lf.createTypedLiteral(end)));
+        //set selection prefix and suffix (TextAnnotation new model)
+        prefixSuffixSize = prefixSuffixSize < 3 ? 10 : prefixSuffixSize;
+        metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_PREFIX, 
+            new PlainLiteralImpl(content.substring(
+                Math.max(0,start-prefixSuffixSize), start), lang)));
+        metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_SUFFIX, 
+            new PlainLiteralImpl(content.substring(
+                end,Math.min(content.length(), end+prefixSuffixSize)),lang)));
+        //set the selected text (or alternatively head and tail)
+        int maxSelectedTextSize = Math.max(30, prefixSuffixSize*5);
+        if(!allowSelectionHeadTail || end-start <= maxSelectedTextSize){
+            metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, 
+                new PlainLiteralImpl(content.substring(start, end),lang)));
+        } else { //selected area to long for fise:selected-text
+            //use fise:selection-head and fise:selection-tail instead
+            metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_HEAD, 
+                new PlainLiteralImpl(content.substring(
+                    start,start+prefixSuffixSize),lang)));
+            metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_TAIL, 
+                new PlainLiteralImpl(content.substring(
+                    end-prefixSuffixSize,end),lang)));
+        }
+    }
+    
+    /**
      * Create a new instance with the types enhancer:Enhancement and
      * enhancer:EntityAnnotation in the metadata-graph of the content
      * item along with default properties (dc:creator and dc:created) and return
@@ -129,8 +187,7 @@ public class EnhancementEngineHelper {
     public static UriRef createEntityEnhancement(MGraph metadata,
                 EnhancementEngine engine, UriRef contentItemId){
         UriRef enhancement = createEnhancement(metadata, engine, contentItemId);
-        metadata.add(new TripleImpl(enhancement, Properties.RDF_TYPE,
-                TechnicalClasses.ENHANCER_ENTITYANNOTATION));
+        metadata.add(new TripleImpl(enhancement, RDF_TYPE, ENHANCER_ENTITYANNOTATION));
         return enhancement;
     }
     /**
@@ -148,8 +205,7 @@ public class EnhancementEngineHelper {
     public static UriRef createTopicEnhancement(MGraph metadata,
                  EnhancementEngine engine, UriRef contentItemId){
          UriRef enhancement = createEnhancement(metadata, engine, contentItemId);
-         metadata.add(new TripleImpl(enhancement, Properties.RDF_TYPE,
-                 TechnicalClasses.ENHANCER_TOPICANNOTATION));
+         metadata.add(new TripleImpl(enhancement, RDF_TYPE, ENHANCER_TOPICANNOTATION));
          return enhancement;
      }
     /**
@@ -183,13 +239,13 @@ public class EnhancementEngineHelper {
         UriRef enhancement = new UriRef("urn:enhancement-"
                 + EnhancementEngineHelper.randomUUID());
         //add the Enhancement Type
-        metadata.add(new TripleImpl(enhancement, Properties.RDF_TYPE,
-                TechnicalClasses.ENHANCER_ENHANCEMENT));
+        metadata.add(new TripleImpl(enhancement, RDF_TYPE,
+                ENHANCER_ENHANCEMENT));
         //add the extracted from content item
         metadata.add(new TripleImpl(enhancement,
-                Properties.ENHANCER_EXTRACTED_FROM, contentItemId));
+                ENHANCER_EXTRACTED_FROM, contentItemId));
         // creation date
-        metadata.add(new TripleImpl(enhancement, Properties.DC_CREATED,
+        metadata.add(new TripleImpl(enhancement, DC_CREATED,
                 literalFactory.createTypedLiteral(new Date())));
 
         // the engines that extracted the data
@@ -203,7 +259,7 @@ public class EnhancementEngineHelper {
          * We would need to add getEnhancerID() method to the enhancer interface
          * to access this information
           */
-        metadata.add(new TripleImpl(enhancement, Properties.DC_CREATOR,
+        metadata.add(new TripleImpl(enhancement, DC_CREATOR,
                 literalFactory.createTypedLiteral(engine.getClass().getName())));
         return enhancement;
     }
@@ -218,10 +274,10 @@ public class EnhancementEngineHelper {
                                              EnhancementEngine engine){
         LiteralFactory literalFactory = LiteralFactory.getInstance();
         // TODO: use a public dereferencing URI instead?
-        metadata.add(new TripleImpl(enhancement, Properties.DC_CONTRIBUTOR,
+        metadata.add(new TripleImpl(enhancement, DC_CONTRIBUTOR,
             literalFactory.createTypedLiteral(engine.getClass().getName())));
         //set the modification date to the current date.
-        set(metadata,enhancement,Properties.DC_MODIFIED,new Date(),literalFactory);
+        set(metadata,enhancement,DC_MODIFIED,new Date(),literalFactory);
     }
     
     /**
@@ -232,7 +288,7 @@ public class EnhancementEngineHelper {
      * @param ci the ContentItem being under analysis
      * @param engine the Engine performing the analysis
      * @return the URI of the new extraction instance
-     * @deprecated
+     * @deprecated will be remove with 1.0
      * @see EnhancementEngineHelper#createEntityEnhancement(ContentItem, EnhancementEngine)
      * @see EnhancementEngineHelper#createTextEnhancement(ContentItem, EnhancementEngine)
      */
@@ -245,22 +301,22 @@ public class EnhancementEngineHelper {
         UriRef extraction = new UriRef("urn:extraction-"
                 + EnhancementEngineHelper.randomUUID());
 
-        metadata.add(new TripleImpl(extraction, Properties.RDF_TYPE,
-                TechnicalClasses.ENHANCER_EXTRACTION));
+        metadata.add(new TripleImpl(extraction, RDF_TYPE,
+                ENHANCER_EXTRACTION));
 
         // relate the extraction to the content item
         metadata.add(new TripleImpl(extraction,
-                Properties.ENHANCER_RELATED_CONTENT_ITEM, new UriRef(ci.getUri().getUnicodeString())));
+                ENHANCER_RELATED_CONTENT_ITEM, new UriRef(ci.getUri().getUnicodeString())));
 
         // creation date
-        metadata.add(new TripleImpl(extraction, Properties.DC_CREATED,
+        metadata.add(new TripleImpl(extraction, DC_CREATED,
                 literalFactory.createTypedLiteral(new Date())));
 
         // the engines that extracted the data
         // TODO: add some kind of versioning info for the extractor?
         // TODO: use a public dereferencing URI instead? that would allow for
         // explicit versioning too
-        metadata.add(new TripleImpl(extraction, Properties.DC_CREATOR,
+        metadata.add(new TripleImpl(extraction, DC_CREATOR,
                 literalFactory.createTypedLiteral(engine.getClass().getName())));
 
         return extraction;
@@ -557,7 +613,7 @@ public class EnhancementEngineHelper {
             NonLiteral textAnnotation = textAnnoataions.next().getSubject();
             String language = getString(graph, textAnnotation, DC_LANGUAGE);
             if(language != null){
-                Double confidence = get(graph, textAnnotation, Properties.ENHANCER_CONFIDENCE, Double.class, lf);
+                Double confidence = get(graph, textAnnotation, ENHANCER_CONFIDENCE, Double.class, lf);
                 confidences.put(textAnnotation,confidence);
                 langAnnotations.add(textAnnotation);
             }

Modified: stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java?rev=1458446&r1=1458445&r2=1458446&view=diff
==============================================================================
--- stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java (original)
+++ stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/rdf/Properties.java Tue Mar 19 18:25:59 2013
@@ -16,6 +16,7 @@
 */
 package org.apache.stanbol.enhancer.servicesapi.rdf;
 
+import org.apache.clerezza.rdf.core.PlainLiteral;
 import org.apache.clerezza.rdf.core.UriRef;
 
 /**
@@ -110,11 +111,14 @@ public class Properties {
      * and longitude) concatenated as a string literal using a whitespace as
      * separator.
      */
+    @Deprecated
     public static final UriRef GEORSS_POINT = new UriRef(NamespaceEnum.georss
             + "point");
 
+    @Deprecated
     public static final UriRef GEO_LAT = new UriRef(NamespaceEnum.geo + "lat");
 
+    @Deprecated
     public static final UriRef GEO_LONG = new UriRef(NamespaceEnum.geo + "long");
 
     public static final UriRef SKOS_BROADER = new UriRef(NamespaceEnum.skos + "broader");
@@ -150,6 +154,36 @@ public class Properties {
      */
     public static final UriRef ENHANCER_SELECTION_CONTEXT = new UriRef(
             NamespaceEnum.fise + "selection-context");
+    /**
+     * The prefix of the {@link #ENHANCER_SELECTED_TEXT}. Intended to be used
+     * to find the exact position within the text if char indexes can not be used
+     * @since 0.11.0
+     */
+    public final static UriRef ENHANCER_SELECTION_PREFIX = new UriRef(
+        NamespaceEnum.fise + "selection-prefix");
+    /**
+     * The first few chars of the {@link #ENHANCER_SELECTED_TEXT}. To be used if
+     * the selected text is to long to be included as a {@link PlainLiteral} (
+     * e.g. when selection sentences or whole sections of the text).
+     * @since 0.11.0
+     */
+    public final static UriRef ENHANCER_SELECTION_HEAD = new UriRef(
+        NamespaceEnum.fise + "selection-head");
+    /**
+     * The last few chars of the {@link #ENHANCER_SELECTED_TEXT}. To be used if
+     * the selected text is to long to be included as a {@link PlainLiteral} (
+     * e.g. when selection sentences or whole sections of the text).
+     * @since 0.11.0
+     */
+    public final static UriRef ENHANCER_SELECTION_TAIL = new UriRef(
+        NamespaceEnum.fise + "selection-tail");
+    /**
+     * The suffix of the {@link #ENHANCER_SELECTED_TEXT}. Intended to be used
+     * to find the exact position within the text if char indexes can not be used
+     * @since 0.11.0
+     */
+    public final static UriRef ENHANCER_SELECTION_SUFFIX = new UriRef(
+        NamespaceEnum.fise + "selection-suffix");
 
     /**
      * A positive double value to rank extractions according to the algorithm
@@ -212,6 +246,7 @@ public class Properties {
     /**
      * The sha1 hexadecimal digest of a content item.
      */
+    @Deprecated
     public static final UriRef FOAF_SHA1 = new UriRef(NamespaceEnum.foaf
             + "sha1");
 

Modified: stanbol/trunk/enhancer/generic/servicesapi/src/main/resources/fise.owl
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/servicesapi/src/main/resources/fise.owl?rev=1458446&r1=1458445&r2=1458446&view=diff
==============================================================================
--- stanbol/trunk/enhancer/generic/servicesapi/src/main/resources/fise.owl (original)
+++ stanbol/trunk/enhancer/generic/servicesapi/src/main/resources/fise.owl Tue Mar 19 18:25:59 2013
@@ -98,6 +98,40 @@
     <rdfs:comment xml:lang="en">the selected text</rdfs:comment>
     <rdf:type rdf:resource="http://www.w3.org/2002/07/owl#DatatypeProperty"/>
   </owl:FunctionalProperty>
+  <owl:FunctionalProperty rdf:about="http://fise.iks-project.eu/ontology/selection-prefix">
+    <rdfs:domain rdf:resource="http://fise.iks-project.eu/ontology/TextAnnotation"/>
+    <rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string"/>
+    <rdfs:label xml:lang="en">selection prefix</rdfs:label>
+    <rdfs:comment xml:lang="en">Some chars in front of the fise:selected-text</rdfs:comment>
+    <rdf:type rdf:resource="http://www.w3.org/2002/07/owl#DatatypeProperty"/>
+  </owl:FunctionalProperty>
+  <owl:FunctionalProperty rdf:about="http://fise.iks-project.eu/ontology/selection-suffix">
+    <rdfs:domain rdf:resource="http://fise.iks-project.eu/ontology/TextAnnotation"/>
+    <rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string"/>
+    <rdfs:label xml:lang="en">selection suffix</rdfs:label>
+    <rdfs:comment xml:lang="en">Some chars following the fise:selected-text</rdfs:comment>
+    <rdf:type rdf:resource="http://www.w3.org/2002/07/owl#DatatypeProperty"/>
+  </owl:FunctionalProperty>
+  <owl:FunctionalProperty rdf:about="http://fise.iks-project.eu/ontology/selection-head">
+    <rdfs:domain rdf:resource="http://fise.iks-project.eu/ontology/TextAnnotation"/>
+    <rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string"/>
+    <rdfs:label xml:lang="en">selection head</rdfs:label>
+    <rdfs:comment xml:lang="en">
+      The first few chars of the fise:selected-text. Typically used if the selected-text
+      is to long for inclusion as RDF literal.
+    </rdfs:comment>
+    <rdf:type rdf:resource="http://www.w3.org/2002/07/owl#DatatypeProperty"/>
+  </owl:FunctionalProperty>
+  <owl:FunctionalProperty rdf:about="http://fise.iks-project.eu/ontology/selection-tail">
+    <rdfs:domain rdf:resource="http://fise.iks-project.eu/ontology/TextAnnotation"/>
+    <rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string"/>
+    <rdfs:label xml:lang="en">selection tail</rdfs:label>
+    <rdfs:comment xml:lang="en">
+      The last few chars of the fise:selected-text. Typically used if the selected-text
+      is to long for inclusion as RDF literal.
+    </rdfs:comment>
+    <rdf:type rdf:resource="http://www.w3.org/2002/07/owl#DatatypeProperty"/>
+  </owl:FunctionalProperty>
   <owl:FunctionalProperty rdf:about="http://fise.iks-project.eu/ontology/entity-reference">
     <rdfs:domain rdf:resource="http://fise.iks-project.eu/ontology/EntityAnnotation"/>
     <rdf:type rdf:resource="http://www.w3.org/2002/07/owl#ObjectProperty"/>

Modified: stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/EnhancementEngineHelperTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/EnhancementEngineHelperTest.java?rev=1458446&r1=1458445&r2=1458446&view=diff
==============================================================================
--- stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/EnhancementEngineHelperTest.java (original)
+++ stanbol/trunk/enhancer/generic/servicesapi/src/test/java/org/apache/stanbol/enhancer/serviceapi/helper/EnhancementEngineHelperTest.java Tue Mar 19 18:25:59 2013
@@ -16,9 +16,104 @@
 */
 package org.apache.stanbol.enhancer.serviceapi.helper;
 
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.commons.indexedgraph.IndexedMGraph;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.EngineException;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
+import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
+import org.junit.Assert;
+import org.junit.Test;
+
 
 public class EnhancementEngineHelperTest {
 
-//TODO:
+    private static final LiteralFactory lf = LiteralFactory.getInstance();
+    
+    /**
+     * internally used as argument for {@link EnhancementEngineHelper} methods
+     */
+    private static final EnhancementEngine dummyEngine = new EnhancementEngine(){
+
+        @Override
+        public int canEnhance(ContentItem ci) throws EngineException {
+            return 0;
+        }
+    
+        @Override
+        public void computeEnhancements(ContentItem ci) throws EngineException {
+        }
+    
+        @Override
+        public String getName() {
+            return "DummyEngine";
+        }
+        
+    };
+    
+    
+    @Test
+    public void testTextAnnotationNewModel(){
+        String content = "The Stanbol Enhancer can extract Entities form parsed Text.";
+        Language lang = new Language("en");
+        int start = content.indexOf("Stanbol");
+        int end = start+"Stanbol Enhancer".length();
+        UriRef ciUri = new UriRef("http://www.example.org/contentItem#1");
+        MGraph metadata = new IndexedMGraph();
+        UriRef ta = EnhancementEngineHelper.createTextEnhancement(metadata, dummyEngine, ciUri);
+        EnhancementEngineHelper.setOccurrence(metadata, ta, content, start, end, lang, -1, true);
+        Assert.assertEquals("The ", EnhancementEngineHelper.getString(
+            metadata, ta,Properties.ENHANCER_SELECTION_PREFIX));
+        Assert.assertEquals("Stanbol Enhancer", EnhancementEngineHelper.getString(
+            metadata, ta,Properties.ENHANCER_SELECTED_TEXT));
+        Assert.assertEquals(" can extra", EnhancementEngineHelper.getString(
+            metadata, ta,Properties.ENHANCER_SELECTION_SUFFIX));
+        Assert.assertEquals(Integer.valueOf(start), EnhancementEngineHelper.get(
+            metadata, ta, Properties.ENHANCER_START, Integer.class, lf));
+        Assert.assertEquals(Integer.valueOf(end), EnhancementEngineHelper.get(
+            metadata, ta, Properties.ENHANCER_END, Integer.class, lf));
+        //head and tail should be null
+        Assert.assertNull(EnhancementEngineHelper.getString(
+            metadata, ta,Properties.ENHANCER_SELECTION_HEAD));
+        Assert.assertNull(EnhancementEngineHelper.getString(
+            metadata, ta,Properties.ENHANCER_SELECTION_TAIL));
+        
+        content = "Ich habe den Schlüssel fürs Donaudampfschiffahrtsgesellschaftskapitänskajütenschloss verlohren.";
+        start = content.indexOf("Donaudampfschi");
+        end = content.indexOf(" verlohren");
+        ta = EnhancementEngineHelper.createTextEnhancement(metadata, dummyEngine, ciUri);
+        EnhancementEngineHelper.setOccurrence(metadata, ta, content, start, end, lang, -1, true);
+        Assert.assertEquals("ssel fürs ", EnhancementEngineHelper.getString(
+            metadata, ta,Properties.ENHANCER_SELECTION_PREFIX));
+        Assert.assertEquals(" verlohren", EnhancementEngineHelper.getString(
+            metadata, ta,Properties.ENHANCER_SELECTION_SUFFIX));
+        Assert.assertEquals(Integer.valueOf(start), EnhancementEngineHelper.get(
+            metadata, ta, Properties.ENHANCER_START, Integer.class, lf));
+        Assert.assertEquals(Integer.valueOf(end), EnhancementEngineHelper.get(
+            metadata, ta, Properties.ENHANCER_END, Integer.class, lf));
+        //selected text is expected to be null
+        Assert.assertNull(EnhancementEngineHelper.getString(
+            metadata, ta,Properties.ENHANCER_SELECTED_TEXT));
+        //tail and head should be present
+        Assert.assertEquals("Donaudampf", EnhancementEngineHelper.getString(
+            metadata, ta,Properties.ENHANCER_SELECTION_HEAD));
+        Assert.assertEquals("tenschloss", EnhancementEngineHelper.getString(
+            metadata, ta,Properties.ENHANCER_SELECTION_TAIL));
+        
+        //finally the same but deactivating head/tail
+        ta = EnhancementEngineHelper.createTextEnhancement(metadata, dummyEngine, ciUri);
+        EnhancementEngineHelper.setOccurrence(metadata, ta, content, start, end, lang, -1, false);
+        Assert.assertEquals("Donaudampfschiffahrtsgesellschaftskapitänskajütenschloss", 
+            EnhancementEngineHelper.getString(metadata, ta,Properties.ENHANCER_SELECTED_TEXT));
+        Assert.assertNull(EnhancementEngineHelper.getString(
+            metadata, ta,Properties.ENHANCER_SELECTION_HEAD));
+        Assert.assertNull(EnhancementEngineHelper.getString(
+            metadata, ta,Properties.ENHANCER_SELECTION_TAIL));
+    }
+    
 
 }