You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by re...@apache.org on 2013/10/18 19:58:28 UTC

svn commit: r1533571 [3/7] - in /stanbol/branches/commons-ng: ./ commons/solr/ commons/solr/core/ commons/solr/core/src/license/ commons/solr/core/src/main/java/org/apache/stanbol/commons/solr/ commons/solr/core/src/main/java/org/apache/stanbol/commons...

Modified: stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntity.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntity.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntity.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntity.java Fri Oct 18 17:58:24 2013
@@ -22,6 +22,7 @@ import static org.apache.stanbol.enhance
 import org.apache.clerezza.rdf.core.NonLiteral;
 import org.apache.clerezza.rdf.core.TripleCollection;
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.commons.lang.StringUtils;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
 import org.slf4j.Logger;
@@ -79,26 +80,33 @@ public final class NamedEntity {
      * text annotation is missing required information.
      */
     public static NamedEntity createFromTextAnnotation(TripleCollection graph, NonLiteral textAnnotation){
-        String name = EnhancementEngineHelper.getString(graph, textAnnotation, ENHANCER_SELECTED_TEXT);
-        if (name == null) {
+        String selected = EnhancementEngineHelper.getString(graph, textAnnotation, ENHANCER_SELECTED_TEXT);
+        if (selected == null) {
             log.debug("Unable to create NamedEntity for TextAnnotation {} "
                     + "because property {} is not present",textAnnotation,ENHANCER_SELECTED_TEXT);
             return null;
         }
-        name = name.trim();
+        String name = selected.trim();
         if(name.isEmpty()){
             log.debug("Unable to process TextAnnotation {} because its selects "
             		+ "an empty Stirng !",textAnnotation);
             return null;
         }
+        // remove punctuation form the search string
+        name = cleanupKeywords(name);
+        if(name.isEmpty()){
+            log.debug("Unable to process TextAnnotation {} because its selects "
+                    + "an stirng with punktations only (selected: {})!",
+                    textAnnotation, selected);
+            return null;
+        }
         UriRef type = EnhancementEngineHelper.getReference(graph, textAnnotation, DC_TYPE);
         if (type == null) {
             log.warn("Unable to process TextAnnotation {} because property {}"
                      + " is not present!",textAnnotation, DC_TYPE);
             return null;
         }
-        // remove punctuation form the search string
-        return new NamedEntity(textAnnotation,cleanupKeywords(name),type);
+        return new NamedEntity(textAnnotation,name,type);
     }        
     /**
      * Removes punctuation form a parsed string

Modified: stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/MockEntityhub.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/MockEntityhub.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/MockEntityhub.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/MockEntityhub.java Fri Oct 18 17:58:24 2013
@@ -54,11 +54,12 @@ class MockEntityhub implements Entityhub
 
     private static final Logger log = LoggerFactory.getLogger(MockEntityhub.class);
     
+    public static final String TEST_SOLR_CORE_CONFIGURATION = "dbpedia_26k.solrindex.bz2";
     protected SolrYard yard;
     
     protected MockEntityhub(){
         SolrYardConfig config = new SolrYardConfig("dbpedia", "dbpedia");
-        config.setIndexConfigurationName("dbpedia_43k");//use dbpedia default data for initialisation
+        config.setIndexConfigurationName(TEST_SOLR_CORE_CONFIGURATION);
         config.setAllowInitialisation(true);
         IndexReference solrIndexRef = IndexReference.parse(config.getSolrServerLocation());
         SolrServer server = StandaloneEmbeddedSolrServerProvider.getInstance().getSolrServer(

Modified: stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/TestEntityLinkingEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/TestEntityLinkingEnhancementEngine.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/TestEntityLinkingEnhancementEngine.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/TestEntityLinkingEnhancementEngine.java Fri Oct 18 17:58:24 2013
@@ -63,6 +63,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
+import org.apache.stanbol.entityhub.servicesapi.Entityhub;
 import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
 import org.junit.After;
 import org.junit.AfterClass;
@@ -78,30 +79,31 @@ public class TestEntityLinkingEnhancemen
     
     private static final Logger log = LoggerFactory.getLogger(TestEntityLinkingEnhancementEngine.class);
     
+    public static final String CONTEXT = "In March 2009, Condoleezza Rice returned "
+            +"to Stanford University near Palo Alto.";
+    
+    //The old text replaced by STANBOL-1163
+//    public static final String CONTEXT = "Dr. Patrick Marshall (1869 - November 1950) was a"
+//        + " geologist who lived in New Zealand and worked at the University of Otago.";
     /**
-     * The context for the tests (same as in TestOpenNLPEnhancementEngine)
-     */
-    public static final String CONTEXT = "Dr. Patrick Marshall (1869 - November 1950) was a"
-        + " geologist who lived in New Zealand and worked at the University of Otago.";
-    /**
-     * The person for the tests (same as in TestOpenNLPEnhancementEngine)
+     * The person for the tests 
      */
-    public static final String PERSON = "Patrick Marshall";
+    public static final String PERSON = ", Condoleezza Rice";
     /**
      * The organisation for the tests (same as in TestOpenNLPEnhancementEngine)
      */
-    public static final String ORGANISATION ="University of Otago";
+    public static final String ORGANISATION ="Stanford University";
     /**
      * The place for the tests (same as in TestOpenNLPEnhancementEngine)
      */
-    public static final String PLACE = "New Zealand";
+    public static final String PLACE = "Palo Alto";
 
     private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
     
-    static NamedEntityTaggingEngine entityLinkingEngine;
-
     private static String userDir = System.getProperty("user.dir");
     
+    private static Entityhub entityhub;
+    
     @BeforeClass
     public static void setUpServices() throws IOException {
         //TODO: set user.dir to /target/test-files
@@ -114,36 +116,47 @@ public class TestEntityLinkingEnhancemen
         String testRootDir = testFiles.getCanonicalPath();
         log.info("Test 'user.dir' folder {}",testRootDir);
         System.getProperties().setProperty("user.dir", testRootDir);
-        entityLinkingEngine = new NamedEntityTaggingEngine();
+        entityhub = new MockEntityhub();
+    }
+
+    @AfterClass
+    public static void shutdownServices() {
+        System.getProperties().setProperty("user.dir", userDir);
+    }
+
+    protected NamedEntityTaggingEngine initEngine(boolean person, boolean organisation, boolean place){
+        NamedEntityTaggingEngine entityLinkingEngine = new NamedEntityTaggingEngine();
         //instead of calling activate we directly set the required fields
         //we need a data source for linking
-        entityLinkingEngine.entityhub = new MockEntityhub();
-        entityLinkingEngine.personState = true;
+        entityLinkingEngine.entityhub = entityhub;
+        entityLinkingEngine.personState = person;
         entityLinkingEngine.personType = OntologicalClasses.DBPEDIA_PERSON.getUnicodeString();
-        entityLinkingEngine.orgState = true;
+        entityLinkingEngine.orgState = organisation;
         entityLinkingEngine.orgType = OntologicalClasses.DBPEDIA_ORGANISATION.getUnicodeString();
-        entityLinkingEngine.placeState = true;
+        entityLinkingEngine.placeState = place;
         entityLinkingEngine.placeType = OntologicalClasses.DBPEDIA_PLACE.getUnicodeString();
         entityLinkingEngine.nameField = Properties.RDFS_LABEL.getUnicodeString();
         //not implemented
         entityLinkingEngine.dereferenceEntities = false;
+        return entityLinkingEngine;
     }
-
-    @Before
-    public void bindServices() throws IOException {
-    }
-
-    @After
-    public void unbindServices() {
-    }
-
-    @AfterClass
-    public static void shutdownServices() {
-        System.getProperties().setProperty("user.dir", userDir);
-    }
-
-    public static ContentItem getContentItem(final String id, final String text) throws IOException {
-        return ciFactory.createContentItem(new UriRef(id),new StringSource(text));
+    /**
+     * Creates and initialises a new content item using {@link #CONTEXT} as
+     * content and 
+     * @return
+     * @throws IOException
+     */
+    private ContentItem initContentItem() throws IOException {
+        ContentItem ci = ciFactory.createContentItem(
+            new UriRef("urn:iks-project:enhancer:text:content-item:person"),
+            new StringSource(CONTEXT));
+        //add three text annotations to be consumed by this test
+        getTextAnnotation(ci, PERSON, CONTEXT, DBPEDIA_PERSON);
+        getTextAnnotation(ci, ORGANISATION, CONTEXT, DBPEDIA_ORGANISATION);
+        getTextAnnotation(ci, PLACE, CONTEXT, DBPEDIA_PLACE);
+        //add the language
+        ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
+        return ci;
     }
 
     public static void getTextAnnotation(ContentItem ci, String name,String context,UriRef type){
@@ -174,20 +187,48 @@ public class TestEntityLinkingEnhancemen
     @Test
     public void testEntityLinkingEnhancementEngine() throws Exception{
         //create a content item
-        ContentItem ci = getContentItem("urn:iks-project:enhancer:text:content-item:person", CONTEXT);
-        //add three text annotations to be consumed by this test
-        getTextAnnotation(ci, PERSON, CONTEXT, DBPEDIA_PERSON);
-        getTextAnnotation(ci, ORGANISATION, CONTEXT, DBPEDIA_ORGANISATION);
-        getTextAnnotation(ci, PLACE, CONTEXT, DBPEDIA_PLACE);
-        //add the language
-        ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
+        ContentItem ci = initContentItem();
+        NamedEntityTaggingEngine entityLinkingEngine = initEngine(true, true, true);
         //perform the computation of the enhancements
         entityLinkingEngine.computeEnhancements(ci);
-        int entityAnnotationCount = validateAllEntityAnnotations(ci);
-        assertEquals(4, entityAnnotationCount);
+        int entityAnnotationCount = validateAllEntityAnnotations(entityLinkingEngine, ci);
+        assertEquals(3, entityAnnotationCount);
+    }
+
+    @Test
+    public void testPersonLinking() throws Exception{
+        //create a content item
+        ContentItem ci = initContentItem();
+        NamedEntityTaggingEngine entityLinkingEngine = initEngine(true, false, false);
+        //perform the computation of the enhancements
+        entityLinkingEngine.computeEnhancements(ci);
+        int entityAnnotationCount = validateAllEntityAnnotations(entityLinkingEngine, ci);
+        assertEquals(1, entityAnnotationCount);
+    }
+
+    @Test
+    public void testOrganizationLinking() throws Exception{
+        //create a content item
+        ContentItem ci = initContentItem();
+        NamedEntityTaggingEngine entityLinkingEngine = initEngine(false, true, false);
+        //perform the computation of the enhancements
+        entityLinkingEngine.computeEnhancements(ci);
+        int entityAnnotationCount = validateAllEntityAnnotations(entityLinkingEngine, ci);
+        assertEquals(1, entityAnnotationCount);
     }
     
-    private static int validateAllEntityAnnotations(ContentItem ci){
+    @Test
+    public void testLocationLinking() throws Exception{
+        //create a content item
+        ContentItem ci = initContentItem();
+        NamedEntityTaggingEngine entityLinkingEngine = initEngine(false, false, true);
+        //perform the computation of the enhancements
+        entityLinkingEngine.computeEnhancements(ci);
+        int entityAnnotationCount = validateAllEntityAnnotations(entityLinkingEngine, ci);
+        assertEquals(1, entityAnnotationCount);
+    }
+
+    private static int validateAllEntityAnnotations(NamedEntityTaggingEngine entityLinkingEngine, ContentItem ci){
         Map<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
         expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
         expectedValues.put(DC_CREATOR,LiteralFactory.getInstance().createTypedLiteral(

Modified: stanbol/branches/commons-ng/enhancement-engines/keywordextraction/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/keywordextraction/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/keywordextraction/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/keywordextraction/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
 # Already used licenses in project :
 # - Apache Software License
 # - Apache Software License, Version 2.0
+# - BSD 3-Clause License
 # - BSD License
 # - Common Development And Distribution License (CDDL), Version 1.0
 # - Common Development And Distribution License (CDDL), Version 1.1
@@ -13,13 +14,13 @@
 # - GNU Lesser General Public License (LGPL), Version 2.1
 # - ICU License
 # - MIT License
+# - New BSD License
 # - Public Domain License
 #-------------------------------------------------------------------------------
 # Please fill the missing licenses for dependencies :
 #
 #
-#Sun Oct 07 18:21:31 CEST 2012
+#Tue Jul 23 16:38:34 CEST 2013
 javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
 org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
 org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0

Modified: stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/pom.xml?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/pom.xml (original)
+++ stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/pom.xml Fri Oct 18 17:58:24 2013
@@ -65,6 +65,9 @@
           <excludes>
             <!-- AL20 License -->
             <exclude>src/license/THIRD-PARTY.properties</exclude>
+
+            <!-- Config -->
+            <exclude>src/main/resources/nostoptags.txt</exclude>
           </excludes>
         </configuration>
       </plugin>

Modified: stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/Constants.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/Constants.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/Constants.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/Constants.java Fri Oct 18 17:58:24 2013
@@ -1,3 +1,19 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 package org.apache.stanbol.enhancer.engines.kuromoji;
 
 import org.apache.lucene.analysis.ja.util.ToStringUtil;

Modified: stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/KuromojiNlpEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/KuromojiNlpEngine.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/KuromojiNlpEngine.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/KuromojiNlpEngine.java Fri Oct 18 17:58:24 2013
@@ -115,7 +115,7 @@ import org.slf4j.LoggerFactory;
 })
 public class KuromojiNlpEngine extends AbstractEnhancementEngine<IOException,RuntimeException> implements ServiceProperties {
 
-    private static final Version LUCENE_VERSION = Version.LUCENE_41;
+    private static final Version LUCENE_VERSION = Version.LUCENE_44;
     private static final String TOKENIZER_MODE = "search"; //normal, extended
     private static final Map<String,Object> SERVICE_PROPERTIES;
     private static final Map<String,String> TOKENIZER_FACTORY_CONFIG = new HashMap<String,String>();
@@ -361,25 +361,17 @@ public class KuromojiNlpEngine extends A
         //and third the parentResourceLoader (if present).
         resourceLoader = new StanbolResourceLoader(KuromojiNlpEngine.class.getClassLoader(), 
             new StanbolResourceLoader(parentResourceLoader));
-        tokenizerFactory = new JapaneseTokenizerFactory();
-        tokenizerFactory.init(TOKENIZER_FACTORY_CONFIG);
-        tokenizerFactory.setLuceneMatchVersion(LUCENE_VERSION);
+        tokenizerFactory = new JapaneseTokenizerFactory(TOKENIZER_FACTORY_CONFIG);
         ((ResourceLoaderAware) tokenizerFactory).inform(resourceLoader);
         //base form filter
-        TokenFilterFactory baseFormFilterFactory =  new JapaneseBaseFormFilterFactory();
-        baseFormFilterFactory.init(BASE_FORM_FILTER_CONFIG);
-        baseFormFilterFactory.setLuceneMatchVersion(LUCENE_VERSION);
+        TokenFilterFactory baseFormFilterFactory =  new JapaneseBaseFormFilterFactory(BASE_FORM_FILTER_CONFIG);
         filterFactories.add(baseFormFilterFactory);
         //POS filter
-        TokenFilterFactory posFilterFactory = new JapanesePartOfSpeechStopFilterFactory();
-        posFilterFactory.init(POS_FILTER_CONFIG);
-        posFilterFactory.setLuceneMatchVersion(LUCENE_VERSION);
+        TokenFilterFactory posFilterFactory = new JapanesePartOfSpeechStopFilterFactory(POS_FILTER_CONFIG);
         ((ResourceLoaderAware) posFilterFactory).inform(resourceLoader);
         filterFactories.add(posFilterFactory);
         //Stemming
-        TokenFilterFactory stemmFilterFactory = new JapaneseKatakanaStemFilterFactory();
-        stemmFilterFactory.init(STEMM_FILTER_CONFIG);
-        stemmFilterFactory.setLuceneMatchVersion(LUCENE_VERSION);
+        TokenFilterFactory stemmFilterFactory = new JapaneseKatakanaStemFilterFactory(STEMM_FILTER_CONFIG);
         filterFactories.add(stemmFilterFactory);
     }
     

Modified: stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/NerData.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/NerData.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/NerData.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/NerData.java Fri Oct 18 17:58:24 2013
@@ -1,3 +1,19 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 package org.apache.stanbol.enhancer.engines.kuromoji.impl;
 
 import org.apache.stanbol.enhancer.nlp.ner.NerTag;

Modified: stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/TestKuromojiNlpEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/TestKuromojiNlpEngine.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/TestKuromojiNlpEngine.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/TestKuromojiNlpEngine.java Fri Oct 18 17:58:24 2013
@@ -1,3 +1,19 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 package org.apache.stanbol.enhancer.engines.kuromoji.impl;
 
 import java.io.IOException;

Propchange: stanbol/branches/commons-ng/enhancement-engines/lucenefstlinking/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Fri Oct 18 17:58:24 2013
@@ -0,0 +1,7 @@
+target
+
+.project
+
+.settings
+
+.classpath

Modified: stanbol/branches/commons-ng/enhancement-engines/lucenefstlinking/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/lucenefstlinking/pom.xml?rev=1533571&r1=1533530&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/lucenefstlinking/pom.xml (original)
+++ stanbol/branches/commons-ng/enhancement-engines/lucenefstlinking/pom.xml Fri Oct 18 17:58:24 2013
@@ -22,13 +22,13 @@
   <parent>
     <groupId>org.apache.stanbol</groupId>
     <artifactId>apache-stanbol-enhancement-engines</artifactId>
-    <version>0.10.1-SNAPSHOT</version>
+    <version>1.0.0-SNAPSHOT</version>
     <relativePath>..</relativePath>
   </parent>
 
   <groupId>org.apache.stanbol</groupId>
   <artifactId>org.apache.stanbol.enhancer.engines.lucenefstlinking</artifactId>
-  <version>0.10.1-SNAPSHOT</version>
+  <version>1.0.0-SNAPSHOT</version>
   <packaging>bundle</packaging>
 
   <name>Apache Stanbol Enhancement Engine : Lucene FST Entity Linking</name>
@@ -62,7 +62,7 @@
           <instructions>
             <Import-Package>
               !org.mitre.solr.tagger.*,
-              org.apache.stanbol.enhancer.servicesapi; provide:=true; version="[0.10,0.12)",
+              org.apache.stanbol.enhancer.servicesapi; provide:=true; version="[0.10,1.1)",
               org.apache.stanbol.enhancer.engines.entitylinking;version=${project.version}; provide:=true,
               *
             </Import-Package>
@@ -93,30 +93,30 @@
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.commons.stanboltools.datafileprovider</artifactId>
-      <version>0.11.0</version>
+      <version>1.0.0-SNAPSHOT</version>
     </dependency>
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.commons.solr.core</artifactId>
-      <version>0.12.0-SNAPSHOT</version>
+      <version>1.0.0-SNAPSHOT</version>
     </dependency>  
   
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.commons.namespaceprefix.service</artifactId>
-      <version>0.11.0</version>
+      <version>1.0.0-SNAPSHOT</version>
     </dependency>
 
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
-      <version>0.11.0-SNAPSHOT</version>
+      <version>1.0.0-SNAPSHOT</version>
     </dependency>
 
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.enhancer.engines.entitylinking.engine</artifactId>
-      <version>0.10.1-SNAPSHOT</version>
+      <version>1.0.0-SNAPSHOT</version>
     </dependency>
 
     <dependency>
@@ -142,14 +142,14 @@
      <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.enhancer.core</artifactId>
-      <version>0.11.0-SNAPSHOT</version>
+      <version>1.0.0-SNAPSHOT</version>
       <scope>test</scope>
     </dependency>
     <!-- the SolrYard with the dbpedia default dataset is used for testing -->
     <dependency>
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.entityhub.yard.solr</artifactId>
-      <version>0.12.0-SNAPSHOT</version>
+      <version>1.0.0-SNAPSHOT</version>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -161,13 +161,13 @@
     <dependency><!-- dbpedia default data do use ICU Tokenizer -->
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.commons.solr.extras.icu</artifactId>
-      <version>0.12.0-SNAPSHOT</version>
+      <version>1.0.0-SNAPSHOT</version>
       <scope>test</scope>
     </dependency>
     <dependency> <!-- required to read the test data (merkel_nlp.json) -->
       <groupId>org.apache.stanbol</groupId>
       <artifactId>org.apache.stanbol.enhancer.nlp.json</artifactId>
-      <version>0.10.0</version>
+      <version>1.0.0-SNAPSHOT</version>
       <scope>test</scope>
     </dependency>
   </dependencies>

Modified: stanbol/branches/commons-ng/enhancement-engines/metaxa/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/metaxa/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/metaxa/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/metaxa/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -28,9 +28,9 @@
 #
 #Thu Feb 07 13:45:25 CET 2013
 com.drewnoakes--metadata-extractor--2.4.0-beta1.bundle=The Apache Software License, Version 2.0
-com.sun.xml.bind--jaxb-impl--2.1.9.bundle=CDDL v1.1
+com.sun.xml.bind--jaxb-impl--2.1.9.bundle=Common Development And Distribution License (CDDL), Version 1.1
 dom4j--dom4j--1.6.1=BSD style license
-javax.xml.bind--jaxb-api--2.1.9.v200905050702_orbit=CDDL v1.1
+javax.xml.bind--jaxb-api--2.1.9.v200905050702_orbit=Common Development And Distribution License (CDDL), Version 1.1
 mp3agic--mp3agic--0.6=MIT License
 net.fortuna.ical4j--ical4j-vcard--0.9.3.ant20100406=iCal4j - License
 net.sourceforge--htmlcleaner--2_1p=BSD License

Modified: stanbol/branches/commons-ng/enhancement-engines/nlp2rdf/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/nlp2rdf/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/nlp2rdf/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/nlp2rdf/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
 # Already used licenses in project :
 # - Apache Software License
 # - Apache Software License, Version 2.0
+# - BSD 3-Clause License
 # - BSD License
 # - Common Development And Distribution License (CDDL), Version 1.0
 # - Common Development And Distribution License (CDDL), Version 1.1
@@ -18,8 +19,7 @@
 # Please fill the missing licenses for dependencies :
 #
 #
-#Thu Feb 07 14:07:48 CET 2013
+#Tue Jul 23 16:38:27 CEST 2013
 javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
 org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
 org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0

Propchange: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-chunker/
------------------------------------------------------------------------------
  Merged /stanbol/trunk/enhancement-engines/opennlp/opennlp-chunker:r1496360-1533530

Modified: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-chunker/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-chunker/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-chunker/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-chunker/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
 # Already used licenses in project :
 # - Apache Software License
 # - Apache Software License, Version 2.0
+# - BSD 3-Clause License
 # - BSD License
 # - Common Development And Distribution License (CDDL), Version 1.0
 # - Common Development And Distribution License (CDDL), Version 1.1
@@ -18,8 +19,7 @@
 # Please fill the missing licenses for dependencies :
 #
 #
-#Thu Feb 07 13:55:20 CET 2013
+#Tue Jul 23 16:38:25 CEST 2013
 javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
 org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
 org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0

Propchange: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-ner/
------------------------------------------------------------------------------
  Merged /stanbol/trunk/enhancement-engines/opennlp/opennlp-ner:r1496360-1533530

Modified: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-ner/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-ner/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-ner/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-ner/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
 # Already used licenses in project :
 # - Apache Software License
 # - Apache Software License, Version 2.0
+# - BSD 3-Clause License
 # - BSD License
 # - Common Development And Distribution License (CDDL), Version 1.0
 # - Common Development And Distribution License (CDDL), Version 1.1
@@ -13,13 +14,13 @@
 # - GNU Lesser General Public License (LGPL), Version 2.1
 # - ICU License
 # - MIT License
+# - New BSD License
 # - Public Domain License
 #-------------------------------------------------------------------------------
 # Please fill the missing licenses for dependencies :
 #
 #
-#Sun Oct 07 16:31:16 CEST 2012
+#Tue Jul 23 16:38:25 CEST 2013
 javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
 org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
 org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0

Propchange: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-pos/
------------------------------------------------------------------------------
  Merged /stanbol/trunk/enhancement-engines/opennlp/opennlp-pos:r1496360-1533530

Modified: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-pos/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-pos/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-pos/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-pos/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
 # Already used licenses in project :
 # - Apache Software License
 # - Apache Software License, Version 2.0
+# - BSD 3-Clause License
 # - BSD License
 # - Common Development And Distribution License (CDDL), Version 1.0
 # - Common Development And Distribution License (CDDL), Version 1.1
@@ -18,8 +19,7 @@
 # Please fill the missing licenses for dependencies :
 #
 #
-#Thu Feb 07 13:46:06 CET 2013
+#Tue Jul 23 16:38:24 CEST 2013
 javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
 org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
 org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0

Propchange: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-sentence/
------------------------------------------------------------------------------
  Merged /stanbol/trunk/enhancement-engines/opennlp/opennlp-sentence:r1496360-1533530

Modified: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-sentence/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-sentence/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-sentence/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-sentence/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
 # Already used licenses in project :
 # - Apache Software License
 # - Apache Software License, Version 2.0
+# - BSD 3-Clause License
 # - BSD License
 # - Common Development And Distribution License (CDDL), Version 1.0
 # - Common Development And Distribution License (CDDL), Version 1.1
@@ -18,8 +19,7 @@
 # Please fill the missing licenses for dependencies :
 #
 #
-#Thu Feb 07 13:46:00 CET 2013
+#Tue Jul 23 16:38:24 CEST 2013
 javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
 org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
 org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0

Propchange: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-token/
------------------------------------------------------------------------------
  Merged /stanbol/trunk/enhancement-engines/opennlp/opennlp-token:r1496360-1533530

Modified: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-token/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-token/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-token/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-token/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
 # Already used licenses in project :
 # - Apache Software License
 # - Apache Software License, Version 2.0
+# - BSD 3-Clause License
 # - BSD License
 # - Common Development And Distribution License (CDDL), Version 1.0
 # - Common Development And Distribution License (CDDL), Version 1.1
@@ -18,8 +19,7 @@
 # Please fill the missing licenses for dependencies :
 #
 #
-#Thu Feb 07 13:46:03 CET 2013
+#Tue Jul 23 16:38:24 CEST 2013
 javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
 org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
 org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0

Modified: stanbol/branches/commons-ng/enhancement-engines/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/pom.xml?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/pom.xml (original)
+++ stanbol/branches/commons-ng/enhancement-engines/pom.xml Fri Oct 18 17:58:24 2013
@@ -77,10 +77,14 @@
     <module>entitylinking</module>
     <module>entityhublinking</module>
     <module>entitytagging</module>
+    <!-- fast EntityLinking using Lucene FST -->
+    <module>lucenefstlinking</module> <!-- see STANBOL-1128 -->
     <!-- deprecated -->
     <module>keywordextraction</module>
+    
 
 	  <!-- Categorization -->
+    <module>topic/api</module>
     <module>topic/engine</module>
     <module>topic/web</module>
 
@@ -106,7 +110,6 @@
     <module>geonames</module> <!-- http://geonames.org -->
     <module>opencalais</module> <!-- http://opencalais.com/ -->
     <module>zemanta</module> <!-- htt://zemanta.com -->
-  
   </modules>
 
   <build>

Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/Sentiment.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/Sentiment.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/Sentiment.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/Sentiment.java Fri Oct 18 17:58:24 2013
@@ -1,3 +1,19 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 package org.apache.stanbol.enhancer.engines.sentiment.summarize;
 
 import java.util.ArrayList;
@@ -8,7 +24,6 @@ import java.util.Set;
 
 import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
-import org.apache.stanbol.enhancer.nlp.model.Section;
 import org.apache.stanbol.enhancer.nlp.model.Sentence;
 import org.apache.stanbol.enhancer.nlp.model.Token;
 import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
@@ -16,11 +31,15 @@ import org.apache.stanbol.enhancer.nlp.p
 import org.apache.stanbol.enhancer.nlp.pos.PosTag;
 
 /**
- * This class is used to allow adding negations to sentiments even if the
- * sentiment was already assigned to an SentimentInfo. In addition this class
- * stores the token for the sentiment AND the tokens causing the negations. No
- * support for multiple negations - meaning that the sentiment value is inverted
- * if 1..* negations are present.
+ * This class is used to represents a {@link Token} that holds a Sentiment in the
+ * context of a {@link Sentence}. Sentiment might be {@link #addNegate(Token) negated}
+ * and be {@link #addAbout(Token) assigned} to a Noun or Pronoun via a
+ * {@link #getVerb() Verb}. The {@link #getStart()} and {@link #getEnd()} values
+ * return the span selected by this Sentiment. This are the lowest start and
+ * highest end values of any token related with this sentiment. Those spans are
+ * used by the {@link SentimentPhrase} class for clustering {@link Sentiment}s
+ * to phrases.
+ * 
  * @author Rupert Westenthaler
  *
  */
@@ -37,16 +56,47 @@ public class Sentiment {
      * {@link #PREF_LEX_CAT}.
      */
     private static final Set<LexicalCategory> PREF_LEX_CAT = EnumSet.of(LexicalCategory.Adjective);
-    
+    /**
+     * The token holding the sentiment
+     */
     private final Token token;
+    /**
+     * The (not negated) value of the sentiment
+     */
     private final double value;
+    /**
+     * The Sentence of the {@link #token}
+     */
     private final Sentence sentence;
+    /**
+     * List of tokens that negate this sentiment. <code>null</code> if no
+     * negation was added
+     */
     private List<Token> negated;
+    /**
+     * The Nouns and/or Pronouns this sentiment is about. <code>null</code> if
+     * no aboutness is defined
+     */
     private List<Token> aboutness;
-    private PosTag posTag;
+    /**
+     * The PosTag of the of the {@link #token}
+     */
+    private final PosTag posTag;
 
+    /**
+     * The start position of this sentiment. This is the lowest start of any
+     * token added to this sentiment. This field is set by {@link #checkSpan(Token)}
+     */
     private int start;
+    /**
+     * The end position of this sentiment. This is the highest end of any
+     * token added to this sentiment. This field is set by {@link #checkSpan(Token)}
+     */
     private int end;
+    /**
+     * The verb assigning this sentiment to the Nouns and/or Pronouns added
+     * by {@link #addAbout(Token)}.
+     */
     private Token verb;
     
     /**
@@ -63,26 +113,33 @@ public class Sentiment {
         this.start = token.getStart();
         this.end = token.getEnd();
         List<Value<PosTag>> tags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION);
-        for(Value<PosTag> tag : tags){
-            if(tag.probability() == Value.UNKNOWN_PROBABILITY ||
-                    tag.probability() >= MIN_POS_CONF || 
-                    !Collections.disjoint(tag.value().getCategories(),PREF_LEX_CAT)){
-                posTag = tag.value();
-                break;
+        PosTag posTag = null;
+        if(tags != null && !tags.isEmpty()){
+            for(Value<PosTag> tag : tags){
+                if(tag.probability() == Value.UNKNOWN_PROBABILITY ||
+                        tag.probability() >= MIN_POS_CONF || 
+                        !Collections.disjoint(tag.value().getCategories(),PREF_LEX_CAT)){
+                    posTag = tag.value();
+                    break;
+                }
+            }
+            if(posTag == null){
+                posTag = tags.get(0).value();
+            }
+            if(posTag.hasCategory(LexicalCategory.Noun)){
+                addAbout(token); //add the token also as noun
+            }
+            if(posTag.hasCategory(LexicalCategory.Verb)){
+                setVerb(token);
             }
         }
-        if(posTag == null){
-            posTag = tags.get(0).value();
-        }
-        if(posTag.hasCategory(LexicalCategory.Noun)){
-            addAbout(token); //add the token also as noun
-        }
-        if(posTag.hasCategory(LexicalCategory.Verb)){
-            setVerb(token);
-        }
+        this.posTag = posTag;
     }
-    
-    public void negate(Token token){
+    /**
+     * Adds an Token that negates this Sentiment
+     * @param token the token
+     */
+    protected void addNegate(Token token){
         if(negated == null){ //most of the time a singeltonList will do
             negated = Collections.singletonList(token);
         } else if(negated.size() == 1){
@@ -93,12 +150,12 @@ public class Sentiment {
         }
         checkSpan(token);
     }
-    protected final void setVerb(Token verb) {
+    protected void setVerb(Token verb) {
         this.verb = verb;
         checkSpan(verb);
     }
 
-    protected final void addAbout(Token noun) {
+    protected void addAbout(Token noun) {
         if(aboutness == null){
             aboutness = new ArrayList<Token>(4);
         }
@@ -107,8 +164,9 @@ public class Sentiment {
     }
     /**
      * Checks the {@link #start} {@link #end} values against the span selected
-     * by the parsed token
-     * @param token
+     * by the parsed token.<p>
+     * This method is called by all others that do add tokens.
+     * @param token the added token
      */
     private void checkSpan(Token token) {
         if(start > token.getStart()){
@@ -126,30 +184,44 @@ public class Sentiment {
     public PosTag getPosTag() {
         return posTag;
     }
+    /**
+     * The Sentiment value (considering possible negations)
+     * @return the sentiment value
+     */
     public double getValue() {
         return negated == null ? value : value*-1;
     }
-
+    /**
+     * The Token holding the sentiment
+     * @return the token
+     */
     public Token getToken() {
         return token;
     }
     public Sentence getSentence() {
         return sentence;
     }
+    /**
+     * The {@link AnalysedText Text}
+     * @return the text
+     */
     public AnalysedText getAnalysedText(){
         return token.getContext();
     }
-    
+    /**
+     * The tokens negating this Sentiment
+     * @return the tokens or an empty list if none
+     */
     public List<Token> getNegates() {
-        return negated == null ? Collections.EMPTY_LIST : negated;
+        return negated == null ? Collections.<Token>emptyList() : negated;
     }
 
     /**
-     * The Nouns or Pronoun(s) the Adjectives are about
-     * @return
+     * The Nouns or Pronoun(s) the Sentiment is about
+     * @return the tokens or an empty list if none.
      */
     public List<Token> getAboutness() {
-        return aboutness == null ? Collections.EMPTY_LIST : aboutness;
+        return aboutness == null ? Collections.<Token>emptyList() : aboutness;
     }
     /**
      * The verb used to assign Adjectives to the Nouns (or Pronouns)
@@ -158,11 +230,19 @@ public class Sentiment {
     public Token getVerb() {
         return verb;
     }
-    
+    /**
+     * The start position of this sentiment. This is the lowest start of any
+     * token linked to this sentiment
+     * @return the start position
+     */
     public int getStart(){
         return start;
     }
-    
+    /**
+     * The end position of this sentiment. This is the highest end of any
+     * token linked to this sentiment
+     * @return the end position
+     */
     public int getEnd(){
         return end;
     }

Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentPhrase.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentPhrase.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentPhrase.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentPhrase.java Fri Oct 18 17:58:24 2013
@@ -1,3 +1,19 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 package org.apache.stanbol.enhancer.engines.sentiment.summarize;
 
 import java.util.ArrayList;
@@ -9,7 +25,8 @@ import org.apache.stanbol.enhancer.nlp.m
 import org.apache.stanbol.enhancer.nlp.model.Token;
 
 /**
- * Used to collect {@link Sentiment}s that refer the same 
+ * Represents phrases in a sentence that do hold a Sentiment value.
+ * Phrases are defined by collecting {@link Sentiment}s that refer the same 
  * {@link Sentiment#getAboutness()}
  * @author Rupert Westenthaler
  */
@@ -32,7 +49,10 @@ public class SentimentPhrase {
     public SentimentPhrase(Sentiment sentiment) {
         addSentiment(sentiment);
     }
-
+    /**
+     * Adds a Sentiment to the Phrase
+     * @param sentiment the sentiment to add
+     */
     public void addSentiment(Sentiment sentiment){
         sentiments.add(sentiment);
         nouns.addAll(sentiment.getAboutness());
@@ -73,9 +93,13 @@ public class SentimentPhrase {
         }
         return __sentiment[2];
     }
-    
+    /**
+     * The Sentence containing this phrase or <code>null</code> if no
+     * {@link Sentiment} was yet added
+     * @return the sentence
+     */
     public Sentence getSentence(){
-        return sentiments.get(0).getSentence();
+        return sentiments.isEmpty() ? null : sentiments.get(0).getSentence();
     }
     
     private void summarizeSentimentValues(){

Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentSummarizationEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentSummarizationEngine.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentSummarizationEngine.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentSummarizationEngine.java Fri Oct 18 17:58:24 2013
@@ -16,8 +16,6 @@
  */
 package org.apache.stanbol.enhancer.engines.sentiment.summarize;
 
-import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.PHRASE_ANNOTATION;
-import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.POS_ANNOTATION;
 import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.SENTIMENT_ANNOTATION;
 import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.createTextEnhancement;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
@@ -35,11 +33,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.NavigableMap;
-import java.util.SortedMap;
 import java.util.TreeMap;
 
-import javax.swing.DebugGraphics;
-
 import org.apache.clerezza.rdf.core.Language;
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.rdf.core.MGraph;
@@ -61,7 +56,6 @@ import org.apache.stanbol.enhancer.nlp.m
 import org.apache.stanbol.enhancer.nlp.model.Span.SpanTypeEnum;
 import org.apache.stanbol.enhancer.nlp.model.Token;
 import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
-import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
 import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
 import org.apache.stanbol.enhancer.nlp.pos.Pos;
 import org.apache.stanbol.enhancer.nlp.pos.PosTag;
@@ -142,6 +136,10 @@ public class SentimentSummarizationEngin
      * The dc:type value used for fise:TextAnnotations indicating a Sentiment
      */
     public static final UriRef SENTIMENT_TYPE = new UriRef(NamespaceEnum.fise+"Sentiment");
+    /**
+     * The dc:Type value sued for the sentiment annotation of the whole document
+     */
+    public static final UriRef DOCUMENT_SENTIMENT_TYPE = new UriRef(NamespaceEnum.fise+"DocumentSentiment");
 
 
     private static final int DEFAULT_NEGATION_CONTEXT = 2;
@@ -194,12 +192,12 @@ public class SentimentSummarizationEngin
                 Boolean.parseBoolean(value.toString());
         //should we write sentiment values for sentences
         value = ctx.getProperties().get(PROPERTY_SENTENCE_SENTIMENT_STATE);
-        this.writeDocumentSentiment = value == null ? DEFAULT_SENTENCE_SENTIMENT_STATE :
+        this.writeSentencesSentimet = value == null ? DEFAULT_SENTENCE_SENTIMENT_STATE :
             value instanceof Boolean ? ((Boolean)value).booleanValue() : 
                 Boolean.parseBoolean(value.toString());
         //should we write sentiment values for phrases
         value = ctx.getProperties().get(PROPERTY_PHRASE_SENTIMENT_STATE);
-        this.writeDocumentSentiment = value == null ? DEFAULT_PHRASE_SENTIMENT_STATE :
+        this.writeSentimentPhrases = value == null ? DEFAULT_PHRASE_SENTIMENT_STATE :
             value instanceof Boolean ? ((Boolean)value).booleanValue() : 
                 Boolean.parseBoolean(value.toString());
     }
@@ -375,7 +373,7 @@ public class SentimentSummarizationEngin
                 //for negation use the negation context
                 Integer[] context = getNegationContext(index, conjunctions, searchSpan);
                 for(Token negationToken : negations.subMap(context[0] , true, context[1], true).values()){
-                    sentiment.negate(negationToken);
+                    sentiment.addNegate(negationToken);
                 }
                 //for nouns use the sentiment context
                 context = getSentimentContext(index, sentiment, verbs, conjunctions, nounsAndPronouns, searchSpan);
@@ -416,9 +414,9 @@ public class SentimentSummarizationEngin
         Integer[] context;
         PosTag pos = sentiment.getPosTag();
         boolean isPredicative;
-        if(pos.getPosHierarchy().contains(Pos.PredicativeAdjective)){
+        if(pos != null && pos.getPosHierarchy().contains(Pos.PredicativeAdjective)){
             isPredicative = true;
-        } else if(pos.hasCategory(LexicalCategory.Adjective) && 
+        } else if(pos != null && pos.hasCategory(LexicalCategory.Adjective) && 
                 //Adjective that are not directly in front of a Noun
                 nouns.get(Integer.valueOf(index+1)) == null){ 
           isPredicative = true;
@@ -492,14 +490,14 @@ public class SentimentSummarizationEngin
                 context = new Integer[]{Integer.valueOf(index-nounContext),
                         Integer.valueOf(index+nounContext)};
             }
-        } else if(pos.hasCategory(LexicalCategory.Adjective)){
+        } else if(pos != null && pos.hasCategory(LexicalCategory.Adjective)){
             //for all other adjective the affected noun is expected directly
             //after the noun
             context = new Integer[]{index,Integer.valueOf(index+1)};
-        } else if(pos.hasCategory(LexicalCategory.Noun)){
+        } else if(pos != null && pos.hasCategory(LexicalCategory.Noun)){
             //a noun with an sentiment
             context = new Integer[]{index,index};
-        } else { //else return default
+        } else { //else (includes pos == null) return default
             context = new Integer[]{Integer.valueOf(index-nounContext),
                     Integer.valueOf(index+nounContext)};
         }
@@ -515,17 +513,17 @@ public class SentimentSummarizationEngin
 
     private boolean isPronoun(Token token, String language) {
         Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
-        return posAnnotation.value().getPosHierarchy().contains(Pos.Pronoun);
+        return posAnnotation == null ? false : posAnnotation.value().getPosHierarchy().contains(Pos.Pronoun);
     }
 
     private boolean isVerb(Token token, String language) {
         Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
-        return posAnnotation.value().hasCategory(LexicalCategory.Verb);
+        return posAnnotation == null ? false : posAnnotation.value().hasCategory(LexicalCategory.Verb);
     }
     
     private boolean isCoordinatingConjuction(Token token, String language) {
         Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
-        return posAnnotation.value().getPosHierarchy().contains(Pos.CoordinatingConjunction);
+        return posAnnotation == null ? false : posAnnotation.value().getPosHierarchy().contains(Pos.CoordinatingConjunction);
     }
 
     private boolean isSectionBorder(Token token, String language) {
@@ -714,6 +712,10 @@ public class SentimentSummarizationEngin
         if(ssoType != null){
             metadata.add(new TripleImpl(enh, DC_TYPE, ssoType));
         }
+        if(section.getType() == SpanTypeEnum.Text){
+            metadata.add(new TripleImpl(enh, DC_TYPE, DOCUMENT_SENTIMENT_TYPE));
+        }
+        
     }
     /**
      * The maximum size of the preix/suffix for the selection context

Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/pom.xml?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/pom.xml (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/pom.xml Fri Oct 18 17:58:24 2013
@@ -67,10 +67,12 @@
               org.apache.stanbol.enhancer.servicesapi; provide:=true; version="[0.11,1.1)",
               org.apache.stanbol.enhancer.servicesapi.impl; provide:=true; version="[0.11,1.1)",
               org.apache.stanbol.enhancer.engines.sentiment.api; provide:=true,
+              org.apache.stanbol.enhancer.engines.sentiment.util; provide:=true,
               *
             </Import-Package>
             <Export-Package>
-              org.apache.stanbol.enhancer.engines.sentiment.api;version=${project.version}
+              org.apache.stanbol.enhancer.engines.sentiment.api;version=${project.version},
+              org.apache.stanbol.enhancer.engines.sentiment.util; version=${project.version}
             </Export-Package>
             <Private-Package>
               org.apache.stanbol.enhancer.engines.sentiment.classifiers;version=${project.version},

Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/LexicalCategoryClassifier.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/LexicalCategoryClassifier.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/LexicalCategoryClassifier.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/LexicalCategoryClassifier.java Fri Oct 18 17:58:24 2013
@@ -16,6 +16,8 @@
 
 package org.apache.stanbol.enhancer.engines.sentiment.api;
 
+import java.util.Set;
+
 import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
 import org.apache.stanbol.enhancer.nlp.pos.PosTag;
 
@@ -31,16 +33,11 @@ import org.apache.stanbol.enhancer.nlp.p
  */
 public abstract class LexicalCategoryClassifier implements SentimentClassifier {
 
-    public abstract double classifyWord(String word);
-
-    @Override
-    public boolean isAdjective(PosTag posTag) {
-        return posTag.hasCategory(LexicalCategory.Adjective);
-    }
+    public abstract double classifyWord(LexicalCategory cat, String word);
 
     @Override
-    public boolean isNoun(PosTag posTag) {
-        return posTag.hasCategory(LexicalCategory.Noun);
+    public Set<LexicalCategory> getCategories(PosTag posTag) {
+        return posTag.getCategories();
     }
 
 }

Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/SentimentClassifier.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/SentimentClassifier.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/SentimentClassifier.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/SentimentClassifier.java Fri Oct 18 17:58:24 2013
@@ -16,6 +16,9 @@
 
 package org.apache.stanbol.enhancer.engines.sentiment.api;
 
+import java.util.Set;
+
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
 import org.apache.stanbol.enhancer.nlp.pos.PosTag;
 import org.osgi.framework.BundleContext;
 
@@ -27,7 +30,10 @@ import org.osgi.framework.BundleContext;
  * can be used with this engine. Implementations need to be 
  * {@link BundleContext#registerService(String, Object, java.util.Dictionary)
  * registered as OSGI service}.
+ * @see LexicalCategoryClassifier
+ * 
  * @author Sebastian Schaffert
+ * @author Rupert Westenthaler
  */
 public interface SentimentClassifier {
 
@@ -35,29 +41,26 @@ public interface SentimentClassifier {
      * Given the word passed as argument, return a value between -1 and 1 indicating its sentiment value from
      * very negative to very positive. Unknown words should return the value 0.
      *
-     * @param word
+     * @param cat the lexical category of the word (see 
+     * <a href="https://issues.apache.org/jira/browse/STANBOL-1151">STANBOL-1151</a>)
+     * @param word the word
      * @return
      */
-    public double classifyWord(String word);
-
+    public double classifyWord(LexicalCategory cat, String word);
 
-    /**
-     * Helper method. Return true if the given POS tag indicates an adjective in the language implemented by
-     * this classifier.
-     *
-     * @param posTag
-     * @return
-     */
-    public boolean isAdjective(PosTag posTag);
 
     /**
-     * Helper method. Return true if the given POS tag indicates a noun in the language implemented by this
-     * classifier.
-     *
-     * @param posTag
-     * @return
+     * Getter for the LexicalCategories for the parsed {@link PosTag}. Used
+     * to lookup the lexical categories for the 
+     * {@link #classifyWord(LexicalCategory, String)} lookups.<p>
+     * Simple implementations might return {@link PosTag#getCategories()}. But
+     * as some {@link PosTag} instances might only define the literal
+     * {@link PosTag#getTag()} value this method might also implement its own
+     * mappings.
+     * @param posTag the posTag
+     * @return the categories 
      */
-    public boolean isNoun(PosTag posTag);
+    public Set<LexicalCategory> getCategories(PosTag posTag);
     
     /**
      * The language of this WordClassifier

Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java Fri Oct 18 17:58:24 2013
@@ -20,7 +20,9 @@ import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.util.Collections;
 import java.util.Dictionary;
+import java.util.EnumMap;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Hashtable;
@@ -39,17 +41,19 @@ import org.apache.stanbol.commons.stanbo
 import org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileTracker;
 import org.apache.stanbol.enhancer.engines.sentiment.api.LexicalCategoryClassifier;
 import org.apache.stanbol.enhancer.engines.sentiment.api.SentimentClassifier;
+import org.apache.stanbol.enhancer.engines.sentiment.util.WordSentimentDictionary;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
 import org.osgi.framework.BundleContext;
 import org.osgi.framework.ServiceRegistration;
 import org.osgi.service.component.ComponentContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-
 /**
  * A German word classifier based on SentiWS. Reads the SentiWS positive and negative word lists and parses them
  * into an appropriate hash table, so lookups should be extremely fast.
  * <p/>
  * @author Sebastian Schaffert
+ * @author Rupert Westenthaler
  */
 @Component(immediate=true)
 public class SentiWSComponent {
@@ -177,15 +181,13 @@ public class SentiWSComponent {
      */
     public static class SentiWsClassifierDE extends LexicalCategoryClassifier implements SentimentClassifier {
     
-        private ReadWriteLock lock = new ReentrantReadWriteLock();
-        private Map<String,Double> wordMap = new TreeMap<String,Double>();
+        private WordSentimentDictionary dict = new WordSentimentDictionary(Locale.GERMAN);
 
         protected SentiWsClassifierDE(){}
         
         protected void parseSentiWS(InputStream is) throws IOException {
             log.debug("parsing SentiWS word lists ...");
             BufferedReader in = new BufferedReader(new InputStreamReader(is));
-            lock.writeLock().lock();
             try {
                 for(String line = in.readLine(); line != null; line = in.readLine()) {
                     // input file will have a space- or tab-separated list per line:
@@ -195,37 +197,37 @@ public class SentiWSComponent {
                     String[] components = line.split("\\s");
 
                     // parse the weight
-                    Double weight = Double.parseDouble(components[1]);
+                    Double weight = Double.valueOf(components[1]);
 
                     // get the main word
-                    String[] mainWord = components[0].split("\\|");
-                    wordMap.put(mainWord[0],weight);
+                    String[] wordPart = components[0].split("\\|");
+                    String mainWord = wordPart[0];
+                    LexicalCategory cat = getLexicalCategory(wordPart[1]);
+                    dict.updateSentiment(cat, mainWord, weight);
 
                     // get the remaining words (deflections)
                     if(components.length > 2) {
                         for(String word : components[2].split(",")) {
-                            String lcWord = word.toLowerCase(Locale.GERMAN);
-                            Double current = wordMap.put(lcWord,weight);
-                            if(current != null){
-                                log.warn("Multiple sentiments [{},{}] for word {}",
-                                    new Object[]{current,weight,lcWord});
-                            }
+                            dict.updateSentiment(cat, word, weight);
                         }
                     }
                 }
             } finally {
-                lock.writeLock().unlock();
                 IOUtils.closeQuietly(in);
             }
         }
     
-    
-        public int getWordCount() {
-            lock.readLock().lock();
-            try {
-                return wordMap.size();
-            } finally {
-                lock.readLock().unlock();
+        private LexicalCategory getLexicalCategory(String posTag){
+            char c = posTag.charAt(0);
+            switch (c) {
+                case 'N':
+                    return LexicalCategory.Noun;
+                case 'V':
+                    return LexicalCategory.Verb;
+                case 'A':
+                    return LexicalCategory.Adjective;
+                default: //TODO: change this to a warning and return NULL
+                    throw new IllegalStateException("Unsupported posTag '"+posTag+"'!");
             }
         }
         
@@ -242,26 +244,16 @@ public class SentiWSComponent {
          * @return
          */
         @Override
-        public double classifyWord(String word) {
-            lock.readLock().lock();
-            try {
-                Double sentiment = wordMap.get(word.toLowerCase(Locale.GERMAN));
-                return sentiment != null ? sentiment.doubleValue() : 0.0;
-            } finally {
-                lock.readLock().unlock();  
-            }
+        public double classifyWord(LexicalCategory cat, String word) {
+            Double sentiment = dict.getSentiment(cat, word);
+            return sentiment != null ? sentiment.doubleValue() : 0.0;
         }
         /**
          * Internally used to free up resources when the service is
          * unregistered
          */
         protected void close(){
-            lock.writeLock().lock();
-            try {
-                wordMap.clear();
-            } finally {
-                lock.writeLock().unlock();
-            }
+            dict.clear();
         }
     }
 

Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWordNet.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWordNet.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWordNet.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWordNet.java Fri Oct 18 17:58:24 2013
@@ -39,6 +39,8 @@ import org.apache.stanbol.commons.stanbo
 import org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileTracker;
 import org.apache.stanbol.enhancer.engines.sentiment.api.LexicalCategoryClassifier;
 import org.apache.stanbol.enhancer.engines.sentiment.api.SentimentClassifier;
+import org.apache.stanbol.enhancer.engines.sentiment.util.WordSentimentDictionary;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
 import org.osgi.framework.BundleContext;
 import org.osgi.framework.ServiceRegistration;
 import org.osgi.service.component.ComponentContext;
@@ -55,6 +57,7 @@ import org.slf4j.LoggerFactory;
  * settings.
  * <p/>
  * @author Sebastian Schaffert
+ * @autor Rupert Westenthaler
  */
 @Component(immediate = true)
 public class SentiWordNet {
@@ -164,16 +167,14 @@ public class SentiWordNet {
      */
     public static class SentiWordNetClassifierEN extends LexicalCategoryClassifier implements SentimentClassifier {
 
-        private ReadWriteLock lock = new ReentrantReadWriteLock();
-        private Map<String,Double> wordMap = new TreeMap<String,Double>();
-
+        WordSentimentDictionary dict = new WordSentimentDictionary(Locale.ENGLISH);
+        
         private org.apache.lucene.analysis.en.EnglishMinimalStemmer stemmer = new EnglishMinimalStemmer();
 
         protected SentiWordNetClassifierEN() {}
 
         protected void parseSentiWordNet(InputStream is) throws IOException {
             BufferedReader in = new BufferedReader(new InputStreamReader(is));
-            lock.writeLock().lock();
             try {
                 // read line by line:
                 // - lines starting with # are ignored
@@ -184,6 +185,7 @@ public class SentiWordNet {
                         String[] components = line.split("\t");
     
                         try {
+                            LexicalCategory cat = parseLexCat(components[0]);
                             double posScore = Double.parseDouble(components[2]);
                             double negScore = Double.parseDouble(components[3]);
                             String synonyms = components[4];
@@ -196,34 +198,36 @@ public class SentiWordNet {
                                     // part
                                     String[] synonym = synonymToken.split("#");
                                     String stemmed = getStemmed(synonym[0]);
-                                    Double existing = wordMap.put(stemmed.toLowerCase(Locale.ENGLISH), score);
-                                    if(existing != null){
-                                        log.warn("Multiple Sentiment Scores [{},{}] for word {}",
-                                            new Object[]{existing, score, stemmed.toLowerCase(Locale.ENGLISH)});
-                                    }
+                                    dict.updateSentiment(cat, stemmed, score);
                                 }
                             }
     
-                        } catch (Exception ex) {
+                        } catch (RuntimeException ex) {
                             log.warn("could not parse SentiWordNet line '{}': {}", line, ex.getMessage());
                         }
                     }
                 }
             } finally {
-                lock.writeLock().unlock();
                 IOUtils.closeQuietly(in);
             }
         }
 
-        public int getWordCount() {
-            lock.readLock().lock();
-            try {
-                return wordMap.size();
-            } finally {
-                lock.readLock().unlock();
+        private LexicalCategory parseLexCat(String val) {
+            switch (val.charAt(0)) {
+                case 'a':
+                    return LexicalCategory.Adjective;
+                case 'v':
+                    return LexicalCategory.Verb;
+                case 'n':
+                    return LexicalCategory.Noun;
+                case 'r':
+                    return LexicalCategory.Adverb;
+                default:
+                    throw new IllegalStateException("Uncown POS tag '"+val+"'!");
             }
         }
 
+
         /**
          * Given the word passed as argument, return a value between -1 and 1 indicating its sentiment value
          * from very negative to very positive. Unknown words should return the value 0.
@@ -232,15 +236,9 @@ public class SentiWordNet {
          * @return
          */
         @Override
-        public double classifyWord(String word) {
-            String stemmed = getStemmed(word);
-            lock.readLock().lock();
-            try {
-                Double sentiment = wordMap.get(stemmed.toLowerCase(Locale.ENGLISH));
-                return sentiment != null ? sentiment.doubleValue() : 0.0;
-            } finally {
-                lock.readLock().unlock();
-            }
+        public double classifyWord(LexicalCategory cat, String word) {
+            Double sentiment = dict.getSentiment(cat, getStemmed(word));
+            return sentiment != null ? sentiment.doubleValue() : 0.0;
         }
 
         private String getStemmed(String word) {
@@ -253,12 +251,7 @@ public class SentiWordNet {
         }
         
         protected void close(){
-            lock.writeLock().lock();
-            try {
-                wordMap.clear();
-            } finally {
-                lock.writeLock().unlock();
-            }
+            dict.clear();
         }
     }
 }

Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java Fri Oct 18 17:58:24 2013
@@ -25,6 +25,7 @@ import java.util.Dictionary;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.felix.scr.annotations.Activate;
 import org.apache.felix.scr.annotations.Component;
@@ -264,14 +265,26 @@ public class SentimentEngine  extends Ab
         Iterator<Token> tokens = analysedText.getTokens();
         while(tokens.hasNext()){
             Token token = tokens.next();
-            boolean process = !adjectivesOnly;
-            if(!process){ //check POS types
+            Set<LexicalCategory> cats = null;
+            boolean process = false;
+            if(!adjectivesOnly){
+                process = true;
+                Value<PosTag> posTag = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
+                if(posTag != null && posTag.probability() == Value.UNKNOWN_PROBABILITY
+                        || posTag.probability() >= (minPOSConfidence/2.0)){
+                    cats = classifier.getCategories(posTag.value());
+                } else { //no POS tags or probability to low
+                    cats = Collections.emptySet();
+                }
+            } else { //check PosTags if we need to lookup this word
                 Iterator<Value<PosTag>> posTags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION).iterator();
                 boolean ignore = false;
                 while(!ignore && !process && posTags.hasNext()) {
                     Value<PosTag> value = posTags.next();
                     PosTag tag = value.value();
-                    boolean state = classifier.isAdjective(tag) || classifier.isNoun(tag);
+                    cats = classifier.getCategories(tag);
+                    boolean state = cats.contains(LexicalCategory.Adjective) 
+                            || cats.contains(LexicalCategory.Noun);
                     ignore = !state && (value.probability() == Value.UNKNOWN_PROBABILITY ||
                             value.probability() >= minPOSConfidence);
                     process = state && (value.probability() == Value.UNKNOWN_PROBABILITY ||
@@ -279,11 +292,28 @@ public class SentimentEngine  extends Ab
                 }
             } //else process all tokens ... no POS tag checking needed
             if(process){
-                double sentiment = classifier.classifyWord(token.getSpan());
+                String word = token.getSpan();
+                double sentiment = 0.0;
+                if(cats.isEmpty()){
+                    sentiment = classifier.classifyWord(null, word);
+                } else { //in case of multiple Lexical Cats
+                    //we build the average over NOT NULL sentiments for the word
+                    int catSentNum = 0;
+                    for(LexicalCategory cat : cats){
+                        double catSent = classifier.classifyWord(cat, word);
+                        if(catSent != 0.0){
+                            catSentNum++;
+                            sentiment = sentiment + catSent;
+                        }
+                    }
+                    if(catSentNum > 0){
+                        sentiment = sentiment / (double) catSentNum;
+                    }
+                }
                 if(sentiment != 0.0){
                     token.addAnnotation(SENTIMENT_ANNOTATION, new Value<Double>(sentiment));
                 } //else do not set sentiments with 0.0
-            }
+            } // else do not process
         }
 //        } finally {
 //            ci.getLock().writeLock().unlock();

Modified: stanbol/branches/commons-ng/enhancement-engines/smartcn-token/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/smartcn-token/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/smartcn-token/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/smartcn-token/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -4,6 +4,7 @@
 # - Apache Software License
 # - Apache Software License, Version 2.0
 # - BSD License
+# - BSD-style
 # - Common Development And Distribution License (CDDL), Version 1.0
 # - Common Development And Distribution License (CDDL), Version 1.1
 # - Common Public License, Version 1.0
@@ -13,11 +14,23 @@
 # - GNU Lesser General Public License (LGPL), Version 2.1
 # - ICU License
 # - MIT License
+# - New BSD License
+# - New BSD license
 # - Public Domain License
 #-------------------------------------------------------------------------------
 # Please fill the missing licenses for dependencies :
 #
 #
-#Thu Feb 07 13:59:58 CET 2013
+#Tue Sep 24 21:08:50 CEST 2013
+antlr--antlr--2.7.2=Public Domain
+commons-beanutils--commons-beanutils--1.7.0=Apache Software License, Version 2.0
+dom4j--dom4j--1.1=BSD-style
+jakarta-regexp--jakarta-regexp--1.4=Apache Software License, Version 2.0
+javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
+javax.servlet.jsp--jsp-api--2.1=Common Development And Distribution License (CDDL), Version 1.0
+org.apache.zookeeper--zookeeper--3.4.5=The Apache Software License, Version 2.0
 org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
 org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0
+org.restlet.jee--org.restlet--2.1.1=The Apache Software License, Version 2.0
+org.restlet.jee--org.restlet.ext.servlet--2.1.1=The Apache Software License, Version 2.0
+oro--oro--2.0.8=The Apache Software License, Version 2.0

Modified: stanbol/branches/commons-ng/enhancement-engines/textannotationnewmodel/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/textannotationnewmodel/pom.xml?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/textannotationnewmodel/pom.xml (original)
+++ stanbol/branches/commons-ng/enhancement-engines/textannotationnewmodel/pom.xml Fri Oct 18 17:58:24 2013
@@ -1,4 +1,20 @@
 <?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
 <project
 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
 	xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">