You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/03/20 08:59:49 UTC

svn commit: r1458676 - in /stanbol/trunk/enhancement-engines/sentiment-wordclassifier/src/main: java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/ resources/config/

Author: rwesten
Date: Wed Mar 20 07:59:49 2013
New Revision: 1458676

URL: http://svn.apache.org/r1458676
Log:
STANBOL-990: SentiWordNet (en) and SentiWSComponent (de) now use lowercase for matching. Both the dictionary and parsed words are converted to the lower case version by using the Locale of the according dictionary.

Minor: removed an unused configuration file - left over from the original contribution.

Removed:
    stanbol/trunk/enhancement-engines/sentiment-wordclassifier/src/main/resources/config/
Modified:
    stanbol/trunk/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java
    stanbol/trunk/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWordNet.java

Modified: stanbol/trunk/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java?rev=1458676&r1=1458675&r2=1458676&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java (original)
+++ stanbol/trunk/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java Wed Mar 20 07:59:49 2013
@@ -24,6 +24,7 @@ import java.util.Dictionary;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Hashtable;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
@@ -203,7 +204,12 @@ public class SentiWSComponent {
                     // get the remaining words (deflections)
                     if(components.length > 2) {
                         for(String word : components[2].split(",")) {
-                            wordMap.put(word,weight);
+                            String lcWord = word.toLowerCase(Locale.GERMAN);
+                            Double current = wordMap.put(lcWord,weight);
+                            if(current != null){
+                                log.warn("Multiple sentiments [{},{}] for word {}",
+                                    new Object[]{current,weight,lcWord});
+                            }
                         }
                     }
                 }
@@ -239,7 +245,7 @@ public class SentiWSComponent {
         public double classifyWord(String word) {
             lock.readLock().lock();
             try {
-                Double sentiment = wordMap.get(word);
+                Double sentiment = wordMap.get(word.toLowerCase(Locale.GERMAN));
                 return sentiment != null ? sentiment.doubleValue() : 0.0;
             } finally {
                 lock.readLock().unlock();  

Modified: stanbol/trunk/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWordNet.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWordNet.java?rev=1458676&r1=1458675&r2=1458676&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWordNet.java (original)
+++ stanbol/trunk/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWordNet.java Wed Mar 20 07:59:49 2013
@@ -23,6 +23,7 @@ import java.io.InputStreamReader;
 import java.util.Dictionary;
 import java.util.HashMap;
 import java.util.Hashtable;
+import java.util.Locale;
 import java.util.Map;
 import java.util.TreeMap;
 import java.util.concurrent.locks.ReadWriteLock;
@@ -194,7 +195,12 @@ public class SentiWordNet {
                                     // synonymTokens are of the form word#position, so we strip out the position
                                     // part
                                     String[] synonym = synonymToken.split("#");
-                                    wordMap.put(getStemmed(synonym[0]), score);
+                                    String stemmed = getStemmed(synonym[0]);
+                                    Double existing = wordMap.put(stemmed.toLowerCase(Locale.ENGLISH), score);
+                                    if(existing != null){
+                                        log.warn("Multiple Sentiment Scores [{},{}] for word {}",
+                                            new Object[]{existing, score, stemmed.toLowerCase(Locale.ENGLISH)});
+                                    }
                                 }
                             }
     
@@ -230,7 +236,7 @@ public class SentiWordNet {
             String stemmed = getStemmed(word);
             lock.readLock().lock();
             try {
-                Double sentiment = wordMap.get(stemmed);
+                Double sentiment = wordMap.get(stemmed.toLowerCase(Locale.ENGLISH));
                 return sentiment != null ? sentiment.doubleValue() : 0.0;
             } finally {
                 lock.readLock().unlock();