You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by re...@apache.org on 2013/10/18 19:58:28 UTC
svn commit: r1533571 [3/7] - in /stanbol/branches/commons-ng: ./
commons/solr/ commons/solr/core/ commons/solr/core/src/license/
commons/solr/core/src/main/java/org/apache/stanbol/commons/solr/
commons/solr/core/src/main/java/org/apache/stanbol/commons...
Modified: stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntity.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntity.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntity.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/main/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/NamedEntity.java Fri Oct 18 17:58:24 2013
@@ -22,6 +22,7 @@ import static org.apache.stanbol.enhance
import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.commons.lang.StringUtils;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
import org.slf4j.Logger;
@@ -79,26 +80,33 @@ public final class NamedEntity {
* text annotation is missing required information.
*/
public static NamedEntity createFromTextAnnotation(TripleCollection graph, NonLiteral textAnnotation){
- String name = EnhancementEngineHelper.getString(graph, textAnnotation, ENHANCER_SELECTED_TEXT);
- if (name == null) {
+ String selected = EnhancementEngineHelper.getString(graph, textAnnotation, ENHANCER_SELECTED_TEXT);
+ if (selected == null) {
log.debug("Unable to create NamedEntity for TextAnnotation {} "
+ "because property {} is not present",textAnnotation,ENHANCER_SELECTED_TEXT);
return null;
}
- name = name.trim();
+ String name = selected.trim();
if(name.isEmpty()){
log.debug("Unable to process TextAnnotation {} because its selects "
+ "an empty Stirng !",textAnnotation);
return null;
}
+ // remove punctuation form the search string
+ name = cleanupKeywords(name);
+ if(name.isEmpty()){
+ log.debug("Unable to process TextAnnotation {} because its selects "
+ + "an stirng with punktations only (selected: {})!",
+ textAnnotation, selected);
+ return null;
+ }
UriRef type = EnhancementEngineHelper.getReference(graph, textAnnotation, DC_TYPE);
if (type == null) {
log.warn("Unable to process TextAnnotation {} because property {}"
+ " is not present!",textAnnotation, DC_TYPE);
return null;
}
- // remove punctuation form the search string
- return new NamedEntity(textAnnotation,cleanupKeywords(name),type);
+ return new NamedEntity(textAnnotation,name,type);
}
/**
* Removes punctuation form a parsed string
Modified: stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/MockEntityhub.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/MockEntityhub.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/MockEntityhub.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/MockEntityhub.java Fri Oct 18 17:58:24 2013
@@ -54,11 +54,12 @@ class MockEntityhub implements Entityhub
private static final Logger log = LoggerFactory.getLogger(MockEntityhub.class);
+ public static final String TEST_SOLR_CORE_CONFIGURATION = "dbpedia_26k.solrindex.bz2";
protected SolrYard yard;
protected MockEntityhub(){
SolrYardConfig config = new SolrYardConfig("dbpedia", "dbpedia");
- config.setIndexConfigurationName("dbpedia_43k");//use dbpedia default data for initialisation
+ config.setIndexConfigurationName(TEST_SOLR_CORE_CONFIGURATION);
config.setAllowInitialisation(true);
IndexReference solrIndexRef = IndexReference.parse(config.getSolrServerLocation());
SolrServer server = StandaloneEmbeddedSolrServerProvider.getInstance().getSolrServer(
Modified: stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/TestEntityLinkingEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/TestEntityLinkingEnhancementEngine.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/TestEntityLinkingEnhancementEngine.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/entitytagging/src/test/java/org/apache/stanbol/enhancer/engines/entitytagging/impl/TestEntityLinkingEnhancementEngine.java Fri Oct 18 17:58:24 2013
@@ -63,6 +63,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
+import org.apache.stanbol.entityhub.servicesapi.Entityhub;
import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
import org.junit.After;
import org.junit.AfterClass;
@@ -78,30 +79,31 @@ public class TestEntityLinkingEnhancemen
private static final Logger log = LoggerFactory.getLogger(TestEntityLinkingEnhancementEngine.class);
+ public static final String CONTEXT = "In March 2009, Condoleezza Rice returned "
+ +"to Stanford University near Palo Alto.";
+
+ //The old text replaced by STANBOL-1163
+// public static final String CONTEXT = "Dr. Patrick Marshall (1869 - November 1950) was a"
+// + " geologist who lived in New Zealand and worked at the University of Otago.";
/**
- * The context for the tests (same as in TestOpenNLPEnhancementEngine)
- */
- public static final String CONTEXT = "Dr. Patrick Marshall (1869 - November 1950) was a"
- + " geologist who lived in New Zealand and worked at the University of Otago.";
- /**
- * The person for the tests (same as in TestOpenNLPEnhancementEngine)
+ * The person for the tests
*/
- public static final String PERSON = "Patrick Marshall";
+ public static final String PERSON = ", Condoleezza Rice";
/**
* The organisation for the tests (same as in TestOpenNLPEnhancementEngine)
*/
- public static final String ORGANISATION ="University of Otago";
+ public static final String ORGANISATION ="Stanford University";
/**
* The place for the tests (same as in TestOpenNLPEnhancementEngine)
*/
- public static final String PLACE = "New Zealand";
+ public static final String PLACE = "Palo Alto";
private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
- static NamedEntityTaggingEngine entityLinkingEngine;
-
private static String userDir = System.getProperty("user.dir");
+ private static Entityhub entityhub;
+
@BeforeClass
public static void setUpServices() throws IOException {
//TODO: set user.dir to /target/test-files
@@ -114,36 +116,47 @@ public class TestEntityLinkingEnhancemen
String testRootDir = testFiles.getCanonicalPath();
log.info("Test 'user.dir' folder {}",testRootDir);
System.getProperties().setProperty("user.dir", testRootDir);
- entityLinkingEngine = new NamedEntityTaggingEngine();
+ entityhub = new MockEntityhub();
+ }
+
+ @AfterClass
+ public static void shutdownServices() {
+ System.getProperties().setProperty("user.dir", userDir);
+ }
+
+ protected NamedEntityTaggingEngine initEngine(boolean person, boolean organisation, boolean place){
+ NamedEntityTaggingEngine entityLinkingEngine = new NamedEntityTaggingEngine();
//instead of calling activate we directly set the required fields
//we need a data source for linking
- entityLinkingEngine.entityhub = new MockEntityhub();
- entityLinkingEngine.personState = true;
+ entityLinkingEngine.entityhub = entityhub;
+ entityLinkingEngine.personState = person;
entityLinkingEngine.personType = OntologicalClasses.DBPEDIA_PERSON.getUnicodeString();
- entityLinkingEngine.orgState = true;
+ entityLinkingEngine.orgState = organisation;
entityLinkingEngine.orgType = OntologicalClasses.DBPEDIA_ORGANISATION.getUnicodeString();
- entityLinkingEngine.placeState = true;
+ entityLinkingEngine.placeState = place;
entityLinkingEngine.placeType = OntologicalClasses.DBPEDIA_PLACE.getUnicodeString();
entityLinkingEngine.nameField = Properties.RDFS_LABEL.getUnicodeString();
//not implemented
entityLinkingEngine.dereferenceEntities = false;
+ return entityLinkingEngine;
}
-
- @Before
- public void bindServices() throws IOException {
- }
-
- @After
- public void unbindServices() {
- }
-
- @AfterClass
- public static void shutdownServices() {
- System.getProperties().setProperty("user.dir", userDir);
- }
-
- public static ContentItem getContentItem(final String id, final String text) throws IOException {
- return ciFactory.createContentItem(new UriRef(id),new StringSource(text));
+ /**
+ * Creates and initialises a new content item using {@link #CONTEXT} as
+ * content and
+ * @return
+ * @throws IOException
+ */
+ private ContentItem initContentItem() throws IOException {
+ ContentItem ci = ciFactory.createContentItem(
+ new UriRef("urn:iks-project:enhancer:text:content-item:person"),
+ new StringSource(CONTEXT));
+ //add three text annotations to be consumed by this test
+ getTextAnnotation(ci, PERSON, CONTEXT, DBPEDIA_PERSON);
+ getTextAnnotation(ci, ORGANISATION, CONTEXT, DBPEDIA_ORGANISATION);
+ getTextAnnotation(ci, PLACE, CONTEXT, DBPEDIA_PLACE);
+ //add the language
+ ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
+ return ci;
}
public static void getTextAnnotation(ContentItem ci, String name,String context,UriRef type){
@@ -174,20 +187,48 @@ public class TestEntityLinkingEnhancemen
@Test
public void testEntityLinkingEnhancementEngine() throws Exception{
//create a content item
- ContentItem ci = getContentItem("urn:iks-project:enhancer:text:content-item:person", CONTEXT);
- //add three text annotations to be consumed by this test
- getTextAnnotation(ci, PERSON, CONTEXT, DBPEDIA_PERSON);
- getTextAnnotation(ci, ORGANISATION, CONTEXT, DBPEDIA_ORGANISATION);
- getTextAnnotation(ci, PLACE, CONTEXT, DBPEDIA_PLACE);
- //add the language
- ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
+ ContentItem ci = initContentItem();
+ NamedEntityTaggingEngine entityLinkingEngine = initEngine(true, true, true);
//perform the computation of the enhancements
entityLinkingEngine.computeEnhancements(ci);
- int entityAnnotationCount = validateAllEntityAnnotations(ci);
- assertEquals(4, entityAnnotationCount);
+ int entityAnnotationCount = validateAllEntityAnnotations(entityLinkingEngine, ci);
+ assertEquals(3, entityAnnotationCount);
+ }
+
+ @Test
+ public void testPersonLinking() throws Exception{
+ //create a content item
+ ContentItem ci = initContentItem();
+ NamedEntityTaggingEngine entityLinkingEngine = initEngine(true, false, false);
+ //perform the computation of the enhancements
+ entityLinkingEngine.computeEnhancements(ci);
+ int entityAnnotationCount = validateAllEntityAnnotations(entityLinkingEngine, ci);
+ assertEquals(1, entityAnnotationCount);
+ }
+
+ @Test
+ public void testOrganizationLinking() throws Exception{
+ //create a content item
+ ContentItem ci = initContentItem();
+ NamedEntityTaggingEngine entityLinkingEngine = initEngine(false, true, false);
+ //perform the computation of the enhancements
+ entityLinkingEngine.computeEnhancements(ci);
+ int entityAnnotationCount = validateAllEntityAnnotations(entityLinkingEngine, ci);
+ assertEquals(1, entityAnnotationCount);
}
- private static int validateAllEntityAnnotations(ContentItem ci){
+ @Test
+ public void testLocationLinking() throws Exception{
+ //create a content item
+ ContentItem ci = initContentItem();
+ NamedEntityTaggingEngine entityLinkingEngine = initEngine(false, false, true);
+ //perform the computation of the enhancements
+ entityLinkingEngine.computeEnhancements(ci);
+ int entityAnnotationCount = validateAllEntityAnnotations(entityLinkingEngine, ci);
+ assertEquals(1, entityAnnotationCount);
+ }
+
+ private static int validateAllEntityAnnotations(NamedEntityTaggingEngine entityLinkingEngine, ContentItem ci){
Map<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(DC_CREATOR,LiteralFactory.getInstance().createTypedLiteral(
Modified: stanbol/branches/commons-ng/enhancement-engines/keywordextraction/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/keywordextraction/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/keywordextraction/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/keywordextraction/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
# Already used licenses in project :
# - Apache Software License
# - Apache Software License, Version 2.0
+# - BSD 3-Clause License
# - BSD License
# - Common Development And Distribution License (CDDL), Version 1.0
# - Common Development And Distribution License (CDDL), Version 1.1
@@ -13,13 +14,13 @@
# - GNU Lesser General Public License (LGPL), Version 2.1
# - ICU License
# - MIT License
+# - New BSD License
# - Public Domain License
#-------------------------------------------------------------------------------
# Please fill the missing licenses for dependencies :
#
#
-#Sun Oct 07 18:21:31 CEST 2012
+#Tue Jul 23 16:38:34 CEST 2013
javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0
Modified: stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/pom.xml?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/pom.xml (original)
+++ stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/pom.xml Fri Oct 18 17:58:24 2013
@@ -65,6 +65,9 @@
<excludes>
<!-- AL20 License -->
<exclude>src/license/THIRD-PARTY.properties</exclude>
+
+ <!-- Config -->
+ <exclude>src/main/resources/nostoptags.txt</exclude>
</excludes>
</configuration>
</plugin>
Modified: stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/Constants.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/Constants.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/Constants.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/Constants.java Fri Oct 18 17:58:24 2013
@@ -1,3 +1,19 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
package org.apache.stanbol.enhancer.engines.kuromoji;
import org.apache.lucene.analysis.ja.util.ToStringUtil;
Modified: stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/KuromojiNlpEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/KuromojiNlpEngine.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/KuromojiNlpEngine.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/KuromojiNlpEngine.java Fri Oct 18 17:58:24 2013
@@ -115,7 +115,7 @@ import org.slf4j.LoggerFactory;
})
public class KuromojiNlpEngine extends AbstractEnhancementEngine<IOException,RuntimeException> implements ServiceProperties {
- private static final Version LUCENE_VERSION = Version.LUCENE_41;
+ private static final Version LUCENE_VERSION = Version.LUCENE_44;
private static final String TOKENIZER_MODE = "search"; //normal, extended
private static final Map<String,Object> SERVICE_PROPERTIES;
private static final Map<String,String> TOKENIZER_FACTORY_CONFIG = new HashMap<String,String>();
@@ -361,25 +361,17 @@ public class KuromojiNlpEngine extends A
//and third the parentResourceLoader (if present).
resourceLoader = new StanbolResourceLoader(KuromojiNlpEngine.class.getClassLoader(),
new StanbolResourceLoader(parentResourceLoader));
- tokenizerFactory = new JapaneseTokenizerFactory();
- tokenizerFactory.init(TOKENIZER_FACTORY_CONFIG);
- tokenizerFactory.setLuceneMatchVersion(LUCENE_VERSION);
+ tokenizerFactory = new JapaneseTokenizerFactory(TOKENIZER_FACTORY_CONFIG);
((ResourceLoaderAware) tokenizerFactory).inform(resourceLoader);
//base form filter
- TokenFilterFactory baseFormFilterFactory = new JapaneseBaseFormFilterFactory();
- baseFormFilterFactory.init(BASE_FORM_FILTER_CONFIG);
- baseFormFilterFactory.setLuceneMatchVersion(LUCENE_VERSION);
+ TokenFilterFactory baseFormFilterFactory = new JapaneseBaseFormFilterFactory(BASE_FORM_FILTER_CONFIG);
filterFactories.add(baseFormFilterFactory);
//POS filter
- TokenFilterFactory posFilterFactory = new JapanesePartOfSpeechStopFilterFactory();
- posFilterFactory.init(POS_FILTER_CONFIG);
- posFilterFactory.setLuceneMatchVersion(LUCENE_VERSION);
+ TokenFilterFactory posFilterFactory = new JapanesePartOfSpeechStopFilterFactory(POS_FILTER_CONFIG);
((ResourceLoaderAware) posFilterFactory).inform(resourceLoader);
filterFactories.add(posFilterFactory);
//Stemming
- TokenFilterFactory stemmFilterFactory = new JapaneseKatakanaStemFilterFactory();
- stemmFilterFactory.init(STEMM_FILTER_CONFIG);
- stemmFilterFactory.setLuceneMatchVersion(LUCENE_VERSION);
+ TokenFilterFactory stemmFilterFactory = new JapaneseKatakanaStemFilterFactory(STEMM_FILTER_CONFIG);
filterFactories.add(stemmFilterFactory);
}
Modified: stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/NerData.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/NerData.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/NerData.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/main/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/NerData.java Fri Oct 18 17:58:24 2013
@@ -1,3 +1,19 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
package org.apache.stanbol.enhancer.engines.kuromoji.impl;
import org.apache.stanbol.enhancer.nlp.ner.NerTag;
Modified: stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/TestKuromojiNlpEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/TestKuromojiNlpEngine.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/TestKuromojiNlpEngine.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/kuromoji-nlp/src/test/java/org/apache/stanbol/enhancer/engines/kuromoji/impl/TestKuromojiNlpEngine.java Fri Oct 18 17:58:24 2013
@@ -1,3 +1,19 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
package org.apache.stanbol.enhancer.engines.kuromoji.impl;
import java.io.IOException;
Propchange: stanbol/branches/commons-ng/enhancement-engines/lucenefstlinking/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Fri Oct 18 17:58:24 2013
@@ -0,0 +1,7 @@
+target
+
+.project
+
+.settings
+
+.classpath
Modified: stanbol/branches/commons-ng/enhancement-engines/lucenefstlinking/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/lucenefstlinking/pom.xml?rev=1533571&r1=1533530&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/lucenefstlinking/pom.xml (original)
+++ stanbol/branches/commons-ng/enhancement-engines/lucenefstlinking/pom.xml Fri Oct 18 17:58:24 2013
@@ -22,13 +22,13 @@
<parent>
<groupId>org.apache.stanbol</groupId>
<artifactId>apache-stanbol-enhancement-engines</artifactId>
- <version>0.10.1-SNAPSHOT</version>
+ <version>1.0.0-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.engines.lucenefstlinking</artifactId>
- <version>0.10.1-SNAPSHOT</version>
+ <version>1.0.0-SNAPSHOT</version>
<packaging>bundle</packaging>
<name>Apache Stanbol Enhancement Engine : Lucene FST Entity Linking</name>
@@ -62,7 +62,7 @@
<instructions>
<Import-Package>
!org.mitre.solr.tagger.*,
- org.apache.stanbol.enhancer.servicesapi; provide:=true; version="[0.10,0.12)",
+ org.apache.stanbol.enhancer.servicesapi; provide:=true; version="[0.10,1.1)",
org.apache.stanbol.enhancer.engines.entitylinking;version=${project.version}; provide:=true,
*
</Import-Package>
@@ -93,30 +93,30 @@
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.commons.stanboltools.datafileprovider</artifactId>
- <version>0.11.0</version>
+ <version>1.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.commons.solr.core</artifactId>
- <version>0.12.0-SNAPSHOT</version>
+ <version>1.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.commons.namespaceprefix.service</artifactId>
- <version>0.11.0</version>
+ <version>1.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
- <version>0.11.0-SNAPSHOT</version>
+ <version>1.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.engines.entitylinking.engine</artifactId>
- <version>0.10.1-SNAPSHOT</version>
+ <version>1.0.0-SNAPSHOT</version>
</dependency>
<dependency>
@@ -142,14 +142,14 @@
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.core</artifactId>
- <version>0.11.0-SNAPSHOT</version>
+ <version>1.0.0-SNAPSHOT</version>
<scope>test</scope>
</dependency>
<!-- the SolrYard with the dbpedia default dataset is used for testing -->
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.entityhub.yard.solr</artifactId>
- <version>0.12.0-SNAPSHOT</version>
+ <version>1.0.0-SNAPSHOT</version>
<scope>test</scope>
</dependency>
<dependency>
@@ -161,13 +161,13 @@
<dependency><!-- dbpedia default data do use ICU Tokenizer -->
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.commons.solr.extras.icu</artifactId>
- <version>0.12.0-SNAPSHOT</version>
+ <version>1.0.0-SNAPSHOT</version>
<scope>test</scope>
</dependency>
<dependency> <!-- required to read the test data (merkel_nlp.json) -->
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.nlp.json</artifactId>
- <version>0.10.0</version>
+ <version>1.0.0-SNAPSHOT</version>
<scope>test</scope>
</dependency>
</dependencies>
Modified: stanbol/branches/commons-ng/enhancement-engines/metaxa/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/metaxa/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/metaxa/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/metaxa/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -28,9 +28,9 @@
#
#Thu Feb 07 13:45:25 CET 2013
com.drewnoakes--metadata-extractor--2.4.0-beta1.bundle=The Apache Software License, Version 2.0
-com.sun.xml.bind--jaxb-impl--2.1.9.bundle=CDDL v1.1
+com.sun.xml.bind--jaxb-impl--2.1.9.bundle=Common Development And Distribution License (CDDL), Version 1.1
dom4j--dom4j--1.6.1=BSD style license
-javax.xml.bind--jaxb-api--2.1.9.v200905050702_orbit=CDDL v1.1
+javax.xml.bind--jaxb-api--2.1.9.v200905050702_orbit=Common Development And Distribution License (CDDL), Version 1.1
mp3agic--mp3agic--0.6=MIT License
net.fortuna.ical4j--ical4j-vcard--0.9.3.ant20100406=iCal4j - License
net.sourceforge--htmlcleaner--2_1p=BSD License
Modified: stanbol/branches/commons-ng/enhancement-engines/nlp2rdf/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/nlp2rdf/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/nlp2rdf/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/nlp2rdf/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
# Already used licenses in project :
# - Apache Software License
# - Apache Software License, Version 2.0
+# - BSD 3-Clause License
# - BSD License
# - Common Development And Distribution License (CDDL), Version 1.0
# - Common Development And Distribution License (CDDL), Version 1.1
@@ -18,8 +19,7 @@
# Please fill the missing licenses for dependencies :
#
#
-#Thu Feb 07 14:07:48 CET 2013
+#Tue Jul 23 16:38:27 CEST 2013
javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0
Propchange: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-chunker/
------------------------------------------------------------------------------
Merged /stanbol/trunk/enhancement-engines/opennlp/opennlp-chunker:r1496360-1533530
Modified: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-chunker/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-chunker/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-chunker/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-chunker/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
# Already used licenses in project :
# - Apache Software License
# - Apache Software License, Version 2.0
+# - BSD 3-Clause License
# - BSD License
# - Common Development And Distribution License (CDDL), Version 1.0
# - Common Development And Distribution License (CDDL), Version 1.1
@@ -18,8 +19,7 @@
# Please fill the missing licenses for dependencies :
#
#
-#Thu Feb 07 13:55:20 CET 2013
+#Tue Jul 23 16:38:25 CEST 2013
javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0
Propchange: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-ner/
------------------------------------------------------------------------------
Merged /stanbol/trunk/enhancement-engines/opennlp/opennlp-ner:r1496360-1533530
Modified: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-ner/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-ner/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-ner/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-ner/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
# Already used licenses in project :
# - Apache Software License
# - Apache Software License, Version 2.0
+# - BSD 3-Clause License
# - BSD License
# - Common Development And Distribution License (CDDL), Version 1.0
# - Common Development And Distribution License (CDDL), Version 1.1
@@ -13,13 +14,13 @@
# - GNU Lesser General Public License (LGPL), Version 2.1
# - ICU License
# - MIT License
+# - New BSD License
# - Public Domain License
#-------------------------------------------------------------------------------
# Please fill the missing licenses for dependencies :
#
#
-#Sun Oct 07 16:31:16 CEST 2012
+#Tue Jul 23 16:38:25 CEST 2013
javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0
Propchange: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-pos/
------------------------------------------------------------------------------
Merged /stanbol/trunk/enhancement-engines/opennlp/opennlp-pos:r1496360-1533530
Modified: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-pos/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-pos/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-pos/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-pos/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
# Already used licenses in project :
# - Apache Software License
# - Apache Software License, Version 2.0
+# - BSD 3-Clause License
# - BSD License
# - Common Development And Distribution License (CDDL), Version 1.0
# - Common Development And Distribution License (CDDL), Version 1.1
@@ -18,8 +19,7 @@
# Please fill the missing licenses for dependencies :
#
#
-#Thu Feb 07 13:46:06 CET 2013
+#Tue Jul 23 16:38:24 CEST 2013
javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0
Propchange: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-sentence/
------------------------------------------------------------------------------
Merged /stanbol/trunk/enhancement-engines/opennlp/opennlp-sentence:r1496360-1533530
Modified: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-sentence/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-sentence/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-sentence/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-sentence/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
# Already used licenses in project :
# - Apache Software License
# - Apache Software License, Version 2.0
+# - BSD 3-Clause License
# - BSD License
# - Common Development And Distribution License (CDDL), Version 1.0
# - Common Development And Distribution License (CDDL), Version 1.1
@@ -18,8 +19,7 @@
# Please fill the missing licenses for dependencies :
#
#
-#Thu Feb 07 13:46:00 CET 2013
+#Tue Jul 23 16:38:24 CEST 2013
javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0
Propchange: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-token/
------------------------------------------------------------------------------
Merged /stanbol/trunk/enhancement-engines/opennlp/opennlp-token:r1496360-1533530
Modified: stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-token/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-token/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-token/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/opennlp/opennlp-token/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -3,6 +3,7 @@
# Already used licenses in project :
# - Apache Software License
# - Apache Software License, Version 2.0
+# - BSD 3-Clause License
# - BSD License
# - Common Development And Distribution License (CDDL), Version 1.0
# - Common Development And Distribution License (CDDL), Version 1.1
@@ -18,8 +19,7 @@
# Please fill the missing licenses for dependencies :
#
#
-#Thu Feb 07 13:46:03 CET 2013
+#Tue Jul 23 16:38:24 CEST 2013
javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
-jwnl--jwnl--1.3.3=BSD License
org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0
Modified: stanbol/branches/commons-ng/enhancement-engines/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/pom.xml?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/pom.xml (original)
+++ stanbol/branches/commons-ng/enhancement-engines/pom.xml Fri Oct 18 17:58:24 2013
@@ -77,10 +77,14 @@
<module>entitylinking</module>
<module>entityhublinking</module>
<module>entitytagging</module>
+ <!-- fast EntityLinking using Lucene FST -->
+ <module>lucenefstlinking</module> <!-- see STANBOL-1128 -->
<!-- deprecated -->
<module>keywordextraction</module>
+
<!-- Categorization -->
+ <module>topic/api</module>
<module>topic/engine</module>
<module>topic/web</module>
@@ -106,7 +110,6 @@
<module>geonames</module> <!-- http://geonames.org -->
<module>opencalais</module> <!-- http://opencalais.com/ -->
<module>zemanta</module> <!-- htt://zemanta.com -->
-
</modules>
<build>
Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/Sentiment.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/Sentiment.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/Sentiment.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/Sentiment.java Fri Oct 18 17:58:24 2013
@@ -1,3 +1,19 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
package org.apache.stanbol.enhancer.engines.sentiment.summarize;
import java.util.ArrayList;
@@ -8,7 +24,6 @@ import java.util.Set;
import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
-import org.apache.stanbol.enhancer.nlp.model.Section;
import org.apache.stanbol.enhancer.nlp.model.Sentence;
import org.apache.stanbol.enhancer.nlp.model.Token;
import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
@@ -16,11 +31,15 @@ import org.apache.stanbol.enhancer.nlp.p
import org.apache.stanbol.enhancer.nlp.pos.PosTag;
/**
- * This class is used to allow adding negations to sentiments even if the
- * sentiment was already assigned to an SentimentInfo. In addition this class
- * stores the token for the sentiment AND the tokens causing the negations. No
- * support for multiple negations - meaning that the sentiment value is inverted
- * if 1..* negations are present.
+ * This class is used to represents a {@link Token} that holds a Sentiment in the
+ * context of a {@link Sentence}. Sentiment might be {@link #addNegate(Token) negated}
+ * and be {@link #addAbout(Token) assigned} to a Noun or Pronoun via a
+ * {@link #getVerb() Verb}. The {@link #getStart()} and {@link #getEnd()} values
+ * return the span selected by this Sentiment. This are the lowest start and
+ * highest end values of any token related with this sentiment. Those spans are
+ * used by the {@link SentimentPhrase} class for clustering {@link Sentiment}s
+ * to phrases.
+ *
* @author Rupert Westenthaler
*
*/
@@ -37,16 +56,47 @@ public class Sentiment {
* {@link #PREF_LEX_CAT}.
*/
private static final Set<LexicalCategory> PREF_LEX_CAT = EnumSet.of(LexicalCategory.Adjective);
-
+ /**
+ * The token holding the sentiment
+ */
private final Token token;
+ /**
+ * The (not negated) value of the sentiment
+ */
private final double value;
+ /**
+ * The Sentence of the {@link #token}
+ */
private final Sentence sentence;
+ /**
+ * List of tokens that negate this sentiment. <code>null</code> if no
+ * negation was added
+ */
private List<Token> negated;
+ /**
+ * The Nouns and/or Pronouns this sentiment is about. <code>null</code> if
+ * no aboutness is defined
+ */
private List<Token> aboutness;
- private PosTag posTag;
+ /**
+ * The PosTag of the of the {@link #token}
+ */
+ private final PosTag posTag;
+ /**
+ * The start position of this sentiment. This is the lowest start of any
+ * token added to this sentiment. This field is set by {@link #checkSpan(Token)}
+ */
private int start;
+ /**
+ * The end position of this sentiment. This is the highest end of any
+ * token added to this sentiment. This field is set by {@link #checkSpan(Token)}
+ */
private int end;
+ /**
+ * The verb assigning this sentiment to the Nouns and/or Pronouns added
+ * by {@link #addAbout(Token)}.
+ */
private Token verb;
/**
@@ -63,26 +113,33 @@ public class Sentiment {
this.start = token.getStart();
this.end = token.getEnd();
List<Value<PosTag>> tags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION);
- for(Value<PosTag> tag : tags){
- if(tag.probability() == Value.UNKNOWN_PROBABILITY ||
- tag.probability() >= MIN_POS_CONF ||
- !Collections.disjoint(tag.value().getCategories(),PREF_LEX_CAT)){
- posTag = tag.value();
- break;
+ PosTag posTag = null;
+ if(tags != null && !tags.isEmpty()){
+ for(Value<PosTag> tag : tags){
+ if(tag.probability() == Value.UNKNOWN_PROBABILITY ||
+ tag.probability() >= MIN_POS_CONF ||
+ !Collections.disjoint(tag.value().getCategories(),PREF_LEX_CAT)){
+ posTag = tag.value();
+ break;
+ }
+ }
+ if(posTag == null){
+ posTag = tags.get(0).value();
+ }
+ if(posTag.hasCategory(LexicalCategory.Noun)){
+ addAbout(token); //add the token also as noun
+ }
+ if(posTag.hasCategory(LexicalCategory.Verb)){
+ setVerb(token);
}
}
- if(posTag == null){
- posTag = tags.get(0).value();
- }
- if(posTag.hasCategory(LexicalCategory.Noun)){
- addAbout(token); //add the token also as noun
- }
- if(posTag.hasCategory(LexicalCategory.Verb)){
- setVerb(token);
- }
+ this.posTag = posTag;
}
-
- public void negate(Token token){
+ /**
+ * Adds an Token that negates this Sentiment
+ * @param token the token
+ */
+ protected void addNegate(Token token){
if(negated == null){ //most of the time a singeltonList will do
negated = Collections.singletonList(token);
} else if(negated.size() == 1){
@@ -93,12 +150,12 @@ public class Sentiment {
}
checkSpan(token);
}
- protected final void setVerb(Token verb) {
+ protected void setVerb(Token verb) {
this.verb = verb;
checkSpan(verb);
}
- protected final void addAbout(Token noun) {
+ protected void addAbout(Token noun) {
if(aboutness == null){
aboutness = new ArrayList<Token>(4);
}
@@ -107,8 +164,9 @@ public class Sentiment {
}
/**
* Checks the {@link #start} {@link #end} values against the span selected
- * by the parsed token
- * @param token
+ * by the parsed token.<p>
+ * This method is called by all others that do add tokens.
+ * @param token the added token
*/
private void checkSpan(Token token) {
if(start > token.getStart()){
@@ -126,30 +184,44 @@ public class Sentiment {
public PosTag getPosTag() {
return posTag;
}
+ /**
+ * The Sentiment value (considering possible negations)
+ * @return the sentiment value
+ */
public double getValue() {
return negated == null ? value : value*-1;
}
-
+ /**
+ * The Token holding the sentiment
+ * @return the token
+ */
public Token getToken() {
return token;
}
public Sentence getSentence() {
return sentence;
}
+ /**
+ * The {@link AnalysedText Text}
+ * @return the text
+ */
public AnalysedText getAnalysedText(){
return token.getContext();
}
-
+ /**
+ * The tokens negating this Sentiment
+ * @return the tokens or an empty list if none
+ */
public List<Token> getNegates() {
- return negated == null ? Collections.EMPTY_LIST : negated;
+ return negated == null ? Collections.<Token>emptyList() : negated;
}
/**
- * The Nouns or Pronoun(s) the Adjectives are about
- * @return
+ * The Nouns or Pronoun(s) the Sentiment is about
+ * @return the tokens or an empty list if none.
*/
public List<Token> getAboutness() {
- return aboutness == null ? Collections.EMPTY_LIST : aboutness;
+ return aboutness == null ? Collections.<Token>emptyList() : aboutness;
}
/**
* The verb used to assign Adjectives to the Nouns (or Pronouns)
@@ -158,11 +230,19 @@ public class Sentiment {
public Token getVerb() {
return verb;
}
-
+ /**
+ * The start position of this sentiment. This is the lowest start of any
+ * token linked to this sentiment
+ * @return the start position
+ */
public int getStart(){
return start;
}
-
+ /**
+ * The end position of this sentiment. This is the highest end of any
+ * token linked to this sentiment
+ * @return the end position
+ */
public int getEnd(){
return end;
}
Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentPhrase.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentPhrase.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentPhrase.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentPhrase.java Fri Oct 18 17:58:24 2013
@@ -1,3 +1,19 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
package org.apache.stanbol.enhancer.engines.sentiment.summarize;
import java.util.ArrayList;
@@ -9,7 +25,8 @@ import org.apache.stanbol.enhancer.nlp.m
import org.apache.stanbol.enhancer.nlp.model.Token;
/**
- * Used to collect {@link Sentiment}s that refer the same
+ * Represents phrases in a sentence that do hold a Sentiment value.
+ * Phrases are defined by collecting {@link Sentiment}s that refer the same
* {@link Sentiment#getAboutness()}
* @author Rupert Westenthaler
*/
@@ -32,7 +49,10 @@ public class SentimentPhrase {
public SentimentPhrase(Sentiment sentiment) {
addSentiment(sentiment);
}
-
+ /**
+ * Adds a Sentiment to the Phrase
+ * @param sentiment the sentiment to add
+ */
public void addSentiment(Sentiment sentiment){
sentiments.add(sentiment);
nouns.addAll(sentiment.getAboutness());
@@ -73,9 +93,13 @@ public class SentimentPhrase {
}
return __sentiment[2];
}
-
+ /**
+ * The Sentence containing this phrase or <code>null</code> if no
+ * {@link Sentiment} was yet added
+ * @return the sentence
+ */
public Sentence getSentence(){
- return sentiments.get(0).getSentence();
+ return sentiments.isEmpty() ? null : sentiments.get(0).getSentence();
}
private void summarizeSentimentValues(){
Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentSummarizationEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentSummarizationEngine.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentSummarizationEngine.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-summarization/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/summarize/SentimentSummarizationEngine.java Fri Oct 18 17:58:24 2013
@@ -16,8 +16,6 @@
*/
package org.apache.stanbol.enhancer.engines.sentiment.summarize;
-import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.PHRASE_ANNOTATION;
-import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.POS_ANNOTATION;
import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.SENTIMENT_ANNOTATION;
import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.createTextEnhancement;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
@@ -35,11 +33,8 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NavigableMap;
-import java.util.SortedMap;
import java.util.TreeMap;
-import javax.swing.DebugGraphics;
-
import org.apache.clerezza.rdf.core.Language;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
@@ -61,7 +56,6 @@ import org.apache.stanbol.enhancer.nlp.m
import org.apache.stanbol.enhancer.nlp.model.Span.SpanTypeEnum;
import org.apache.stanbol.enhancer.nlp.model.Token;
import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
-import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
import org.apache.stanbol.enhancer.nlp.pos.Pos;
import org.apache.stanbol.enhancer.nlp.pos.PosTag;
@@ -142,6 +136,10 @@ public class SentimentSummarizationEngin
* The dc:type value used for fise:TextAnnotations indicating a Sentiment
*/
public static final UriRef SENTIMENT_TYPE = new UriRef(NamespaceEnum.fise+"Sentiment");
+ /**
+ * The dc:Type value sued for the sentiment annotation of the whole document
+ */
+ public static final UriRef DOCUMENT_SENTIMENT_TYPE = new UriRef(NamespaceEnum.fise+"DocumentSentiment");
private static final int DEFAULT_NEGATION_CONTEXT = 2;
@@ -194,12 +192,12 @@ public class SentimentSummarizationEngin
Boolean.parseBoolean(value.toString());
//should we write sentiment values for sentences
value = ctx.getProperties().get(PROPERTY_SENTENCE_SENTIMENT_STATE);
- this.writeDocumentSentiment = value == null ? DEFAULT_SENTENCE_SENTIMENT_STATE :
+ this.writeSentencesSentimet = value == null ? DEFAULT_SENTENCE_SENTIMENT_STATE :
value instanceof Boolean ? ((Boolean)value).booleanValue() :
Boolean.parseBoolean(value.toString());
//should we write sentiment values for phrases
value = ctx.getProperties().get(PROPERTY_PHRASE_SENTIMENT_STATE);
- this.writeDocumentSentiment = value == null ? DEFAULT_PHRASE_SENTIMENT_STATE :
+ this.writeSentimentPhrases = value == null ? DEFAULT_PHRASE_SENTIMENT_STATE :
value instanceof Boolean ? ((Boolean)value).booleanValue() :
Boolean.parseBoolean(value.toString());
}
@@ -375,7 +373,7 @@ public class SentimentSummarizationEngin
//for negation use the negation context
Integer[] context = getNegationContext(index, conjunctions, searchSpan);
for(Token negationToken : negations.subMap(context[0] , true, context[1], true).values()){
- sentiment.negate(negationToken);
+ sentiment.addNegate(negationToken);
}
//for nouns use the sentiment context
context = getSentimentContext(index, sentiment, verbs, conjunctions, nounsAndPronouns, searchSpan);
@@ -416,9 +414,9 @@ public class SentimentSummarizationEngin
Integer[] context;
PosTag pos = sentiment.getPosTag();
boolean isPredicative;
- if(pos.getPosHierarchy().contains(Pos.PredicativeAdjective)){
+ if(pos != null && pos.getPosHierarchy().contains(Pos.PredicativeAdjective)){
isPredicative = true;
- } else if(pos.hasCategory(LexicalCategory.Adjective) &&
+ } else if(pos != null && pos.hasCategory(LexicalCategory.Adjective) &&
//Adjective that are not directly in front of a Noun
nouns.get(Integer.valueOf(index+1)) == null){
isPredicative = true;
@@ -492,14 +490,14 @@ public class SentimentSummarizationEngin
context = new Integer[]{Integer.valueOf(index-nounContext),
Integer.valueOf(index+nounContext)};
}
- } else if(pos.hasCategory(LexicalCategory.Adjective)){
+ } else if(pos != null && pos.hasCategory(LexicalCategory.Adjective)){
//for all other adjective the affected noun is expected directly
//after the noun
context = new Integer[]{index,Integer.valueOf(index+1)};
- } else if(pos.hasCategory(LexicalCategory.Noun)){
+ } else if(pos != null && pos.hasCategory(LexicalCategory.Noun)){
//a noun with an sentiment
context = new Integer[]{index,index};
- } else { //else return default
+ } else { //else (includes pos == null) return default
context = new Integer[]{Integer.valueOf(index-nounContext),
Integer.valueOf(index+nounContext)};
}
@@ -515,17 +513,17 @@ public class SentimentSummarizationEngin
private boolean isPronoun(Token token, String language) {
Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
- return posAnnotation.value().getPosHierarchy().contains(Pos.Pronoun);
+ return posAnnotation == null ? false : posAnnotation.value().getPosHierarchy().contains(Pos.Pronoun);
}
private boolean isVerb(Token token, String language) {
Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
- return posAnnotation.value().hasCategory(LexicalCategory.Verb);
+ return posAnnotation == null ? false : posAnnotation.value().hasCategory(LexicalCategory.Verb);
}
private boolean isCoordinatingConjuction(Token token, String language) {
Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
- return posAnnotation.value().getPosHierarchy().contains(Pos.CoordinatingConjunction);
+ return posAnnotation == null ? false : posAnnotation.value().getPosHierarchy().contains(Pos.CoordinatingConjunction);
}
private boolean isSectionBorder(Token token, String language) {
@@ -714,6 +712,10 @@ public class SentimentSummarizationEngin
if(ssoType != null){
metadata.add(new TripleImpl(enh, DC_TYPE, ssoType));
}
+ if(section.getType() == SpanTypeEnum.Text){
+ metadata.add(new TripleImpl(enh, DC_TYPE, DOCUMENT_SENTIMENT_TYPE));
+ }
+
}
/**
* The maximum size of the preix/suffix for the selection context
Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/pom.xml?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/pom.xml (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/pom.xml Fri Oct 18 17:58:24 2013
@@ -67,10 +67,12 @@
org.apache.stanbol.enhancer.servicesapi; provide:=true; version="[0.11,1.1)",
org.apache.stanbol.enhancer.servicesapi.impl; provide:=true; version="[0.11,1.1)",
org.apache.stanbol.enhancer.engines.sentiment.api; provide:=true,
+ org.apache.stanbol.enhancer.engines.sentiment.util; provide:=true,
*
</Import-Package>
<Export-Package>
- org.apache.stanbol.enhancer.engines.sentiment.api;version=${project.version}
+ org.apache.stanbol.enhancer.engines.sentiment.api;version=${project.version},
+ org.apache.stanbol.enhancer.engines.sentiment.util; version=${project.version}
</Export-Package>
<Private-Package>
org.apache.stanbol.enhancer.engines.sentiment.classifiers;version=${project.version},
Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/LexicalCategoryClassifier.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/LexicalCategoryClassifier.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/LexicalCategoryClassifier.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/LexicalCategoryClassifier.java Fri Oct 18 17:58:24 2013
@@ -16,6 +16,8 @@
package org.apache.stanbol.enhancer.engines.sentiment.api;
+import java.util.Set;
+
import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
import org.apache.stanbol.enhancer.nlp.pos.PosTag;
@@ -31,16 +33,11 @@ import org.apache.stanbol.enhancer.nlp.p
*/
public abstract class LexicalCategoryClassifier implements SentimentClassifier {
- public abstract double classifyWord(String word);
-
- @Override
- public boolean isAdjective(PosTag posTag) {
- return posTag.hasCategory(LexicalCategory.Adjective);
- }
+ public abstract double classifyWord(LexicalCategory cat, String word);
@Override
- public boolean isNoun(PosTag posTag) {
- return posTag.hasCategory(LexicalCategory.Noun);
+ public Set<LexicalCategory> getCategories(PosTag posTag) {
+ return posTag.getCategories();
}
}
Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/SentimentClassifier.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/SentimentClassifier.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/SentimentClassifier.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/api/SentimentClassifier.java Fri Oct 18 17:58:24 2013
@@ -16,6 +16,9 @@
package org.apache.stanbol.enhancer.engines.sentiment.api;
+import java.util.Set;
+
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
import org.apache.stanbol.enhancer.nlp.pos.PosTag;
import org.osgi.framework.BundleContext;
@@ -27,7 +30,10 @@ import org.osgi.framework.BundleContext;
* can be used with this engine. Implementations need to be
* {@link BundleContext#registerService(String, Object, java.util.Dictionary)
* registered as OSGI service}.
+ * @see LexicalCategoryClassifier
+ *
* @author Sebastian Schaffert
+ * @author Rupert Westenthaler
*/
public interface SentimentClassifier {
@@ -35,29 +41,26 @@ public interface SentimentClassifier {
* Given the word passed as argument, return a value between -1 and 1 indicating its sentiment value from
* very negative to very positive. Unknown words should return the value 0.
*
- * @param word
+ * @param cat the lexical category of the word (see
+ * <a href="https://issues.apache.org/jira/browse/STANBOL-1151">STANBOL-1151</a>)
+ * @param word the word
* @return
*/
- public double classifyWord(String word);
-
+ public double classifyWord(LexicalCategory cat, String word);
- /**
- * Helper method. Return true if the given POS tag indicates an adjective in the language implemented by
- * this classifier.
- *
- * @param posTag
- * @return
- */
- public boolean isAdjective(PosTag posTag);
/**
- * Helper method. Return true if the given POS tag indicates a noun in the language implemented by this
- * classifier.
- *
- * @param posTag
- * @return
+ * Getter for the LexicalCategories for the parsed {@link PosTag}. Used
+ * to lookup the lexical categories for the
+ * {@link #classifyWord(LexicalCategory, String)} lookups.<p>
+ * Simple implementations might return {@link PosTag#getCategories()}. But
+ * as some {@link PosTag} instances might only define the literal
+ * {@link PosTag#getTag()} value this method might also implement its own
+ * mappings.
+ * @param posTag the posTag
+ * @return the categories
*/
- public boolean isNoun(PosTag posTag);
+ public Set<LexicalCategory> getCategories(PosTag posTag);
/**
* The language of this WordClassifier
Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWSComponent.java Fri Oct 18 17:58:24 2013
@@ -20,7 +20,9 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.util.Collections;
import java.util.Dictionary;
+import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
@@ -39,17 +41,19 @@ import org.apache.stanbol.commons.stanbo
import org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileTracker;
import org.apache.stanbol.enhancer.engines.sentiment.api.LexicalCategoryClassifier;
import org.apache.stanbol.enhancer.engines.sentiment.api.SentimentClassifier;
+import org.apache.stanbol.enhancer.engines.sentiment.util.WordSentimentDictionary;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
import org.osgi.framework.BundleContext;
import org.osgi.framework.ServiceRegistration;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
/**
* A German word classifier based on SentiWS. Reads the SentiWS positive and negative word lists and parses them
* into an appropriate hash table, so lookups should be extremely fast.
* <p/>
* @author Sebastian Schaffert
+ * @author Rupert Westenthaler
*/
@Component(immediate=true)
public class SentiWSComponent {
@@ -177,15 +181,13 @@ public class SentiWSComponent {
*/
public static class SentiWsClassifierDE extends LexicalCategoryClassifier implements SentimentClassifier {
- private ReadWriteLock lock = new ReentrantReadWriteLock();
- private Map<String,Double> wordMap = new TreeMap<String,Double>();
+ private WordSentimentDictionary dict = new WordSentimentDictionary(Locale.GERMAN);
protected SentiWsClassifierDE(){}
protected void parseSentiWS(InputStream is) throws IOException {
log.debug("parsing SentiWS word lists ...");
BufferedReader in = new BufferedReader(new InputStreamReader(is));
- lock.writeLock().lock();
try {
for(String line = in.readLine(); line != null; line = in.readLine()) {
// input file will have a space- or tab-separated list per line:
@@ -195,37 +197,37 @@ public class SentiWSComponent {
String[] components = line.split("\\s");
// parse the weight
- Double weight = Double.parseDouble(components[1]);
+ Double weight = Double.valueOf(components[1]);
// get the main word
- String[] mainWord = components[0].split("\\|");
- wordMap.put(mainWord[0],weight);
+ String[] wordPart = components[0].split("\\|");
+ String mainWord = wordPart[0];
+ LexicalCategory cat = getLexicalCategory(wordPart[1]);
+ dict.updateSentiment(cat, mainWord, weight);
// get the remaining words (deflections)
if(components.length > 2) {
for(String word : components[2].split(",")) {
- String lcWord = word.toLowerCase(Locale.GERMAN);
- Double current = wordMap.put(lcWord,weight);
- if(current != null){
- log.warn("Multiple sentiments [{},{}] for word {}",
- new Object[]{current,weight,lcWord});
- }
+ dict.updateSentiment(cat, word, weight);
}
}
}
} finally {
- lock.writeLock().unlock();
IOUtils.closeQuietly(in);
}
}
-
- public int getWordCount() {
- lock.readLock().lock();
- try {
- return wordMap.size();
- } finally {
- lock.readLock().unlock();
+ private LexicalCategory getLexicalCategory(String posTag){
+ char c = posTag.charAt(0);
+ switch (c) {
+ case 'N':
+ return LexicalCategory.Noun;
+ case 'V':
+ return LexicalCategory.Verb;
+ case 'A':
+ return LexicalCategory.Adjective;
+ default: //TODO: change this to a warning and return NULL
+ throw new IllegalStateException("Unsupported posTag '"+posTag+"'!");
}
}
@@ -242,26 +244,16 @@ public class SentiWSComponent {
* @return
*/
@Override
- public double classifyWord(String word) {
- lock.readLock().lock();
- try {
- Double sentiment = wordMap.get(word.toLowerCase(Locale.GERMAN));
- return sentiment != null ? sentiment.doubleValue() : 0.0;
- } finally {
- lock.readLock().unlock();
- }
+ public double classifyWord(LexicalCategory cat, String word) {
+ Double sentiment = dict.getSentiment(cat, word);
+ return sentiment != null ? sentiment.doubleValue() : 0.0;
}
/**
* Internally used to free up resources when the service is
* unregistered
*/
protected void close(){
- lock.writeLock().lock();
- try {
- wordMap.clear();
- } finally {
- lock.writeLock().unlock();
- }
+ dict.clear();
}
}
Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWordNet.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWordNet.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWordNet.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/classifiers/SentiWordNet.java Fri Oct 18 17:58:24 2013
@@ -39,6 +39,8 @@ import org.apache.stanbol.commons.stanbo
import org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileTracker;
import org.apache.stanbol.enhancer.engines.sentiment.api.LexicalCategoryClassifier;
import org.apache.stanbol.enhancer.engines.sentiment.api.SentimentClassifier;
+import org.apache.stanbol.enhancer.engines.sentiment.util.WordSentimentDictionary;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
import org.osgi.framework.BundleContext;
import org.osgi.framework.ServiceRegistration;
import org.osgi.service.component.ComponentContext;
@@ -55,6 +57,7 @@ import org.slf4j.LoggerFactory;
* settings.
* <p/>
* @author Sebastian Schaffert
+ * @autor Rupert Westenthaler
*/
@Component(immediate = true)
public class SentiWordNet {
@@ -164,16 +167,14 @@ public class SentiWordNet {
*/
public static class SentiWordNetClassifierEN extends LexicalCategoryClassifier implements SentimentClassifier {
- private ReadWriteLock lock = new ReentrantReadWriteLock();
- private Map<String,Double> wordMap = new TreeMap<String,Double>();
-
+ WordSentimentDictionary dict = new WordSentimentDictionary(Locale.ENGLISH);
+
private org.apache.lucene.analysis.en.EnglishMinimalStemmer stemmer = new EnglishMinimalStemmer();
protected SentiWordNetClassifierEN() {}
protected void parseSentiWordNet(InputStream is) throws IOException {
BufferedReader in = new BufferedReader(new InputStreamReader(is));
- lock.writeLock().lock();
try {
// read line by line:
// - lines starting with # are ignored
@@ -184,6 +185,7 @@ public class SentiWordNet {
String[] components = line.split("\t");
try {
+ LexicalCategory cat = parseLexCat(components[0]);
double posScore = Double.parseDouble(components[2]);
double negScore = Double.parseDouble(components[3]);
String synonyms = components[4];
@@ -196,34 +198,36 @@ public class SentiWordNet {
// part
String[] synonym = synonymToken.split("#");
String stemmed = getStemmed(synonym[0]);
- Double existing = wordMap.put(stemmed.toLowerCase(Locale.ENGLISH), score);
- if(existing != null){
- log.warn("Multiple Sentiment Scores [{},{}] for word {}",
- new Object[]{existing, score, stemmed.toLowerCase(Locale.ENGLISH)});
- }
+ dict.updateSentiment(cat, stemmed, score);
}
}
- } catch (Exception ex) {
+ } catch (RuntimeException ex) {
log.warn("could not parse SentiWordNet line '{}': {}", line, ex.getMessage());
}
}
}
} finally {
- lock.writeLock().unlock();
IOUtils.closeQuietly(in);
}
}
- public int getWordCount() {
- lock.readLock().lock();
- try {
- return wordMap.size();
- } finally {
- lock.readLock().unlock();
+ private LexicalCategory parseLexCat(String val) {
+ switch (val.charAt(0)) {
+ case 'a':
+ return LexicalCategory.Adjective;
+ case 'v':
+ return LexicalCategory.Verb;
+ case 'n':
+ return LexicalCategory.Noun;
+ case 'r':
+ return LexicalCategory.Adverb;
+ default:
+ throw new IllegalStateException("Uncown POS tag '"+val+"'!");
}
}
+
/**
* Given the word passed as argument, return a value between -1 and 1 indicating its sentiment value
* from very negative to very positive. Unknown words should return the value 0.
@@ -232,15 +236,9 @@ public class SentiWordNet {
* @return
*/
@Override
- public double classifyWord(String word) {
- String stemmed = getStemmed(word);
- lock.readLock().lock();
- try {
- Double sentiment = wordMap.get(stemmed.toLowerCase(Locale.ENGLISH));
- return sentiment != null ? sentiment.doubleValue() : 0.0;
- } finally {
- lock.readLock().unlock();
- }
+ public double classifyWord(LexicalCategory cat, String word) {
+ Double sentiment = dict.getSentiment(cat, getStemmed(word));
+ return sentiment != null ? sentiment.doubleValue() : 0.0;
}
private String getStemmed(String word) {
@@ -253,12 +251,7 @@ public class SentiWordNet {
}
protected void close(){
- lock.writeLock().lock();
- try {
- wordMap.clear();
- } finally {
- lock.writeLock().unlock();
- }
+ dict.clear();
}
}
}
Modified: stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java (original)
+++ stanbol/branches/commons-ng/enhancement-engines/sentiment-wordclassifier/src/main/java/org/apache/stanbol/enhancer/engines/sentiment/services/SentimentEngine.java Fri Oct 18 17:58:24 2013
@@ -25,6 +25,7 @@ import java.util.Dictionary;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
+import java.util.Set;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
@@ -264,14 +265,26 @@ public class SentimentEngine extends Ab
Iterator<Token> tokens = analysedText.getTokens();
while(tokens.hasNext()){
Token token = tokens.next();
- boolean process = !adjectivesOnly;
- if(!process){ //check POS types
+ Set<LexicalCategory> cats = null;
+ boolean process = false;
+ if(!adjectivesOnly){
+ process = true;
+ Value<PosTag> posTag = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
+ if(posTag != null && posTag.probability() == Value.UNKNOWN_PROBABILITY
+ || posTag.probability() >= (minPOSConfidence/2.0)){
+ cats = classifier.getCategories(posTag.value());
+ } else { //no POS tags or probability to low
+ cats = Collections.emptySet();
+ }
+ } else { //check PosTags if we need to lookup this word
Iterator<Value<PosTag>> posTags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION).iterator();
boolean ignore = false;
while(!ignore && !process && posTags.hasNext()) {
Value<PosTag> value = posTags.next();
PosTag tag = value.value();
- boolean state = classifier.isAdjective(tag) || classifier.isNoun(tag);
+ cats = classifier.getCategories(tag);
+ boolean state = cats.contains(LexicalCategory.Adjective)
+ || cats.contains(LexicalCategory.Noun);
ignore = !state && (value.probability() == Value.UNKNOWN_PROBABILITY ||
value.probability() >= minPOSConfidence);
process = state && (value.probability() == Value.UNKNOWN_PROBABILITY ||
@@ -279,11 +292,28 @@ public class SentimentEngine extends Ab
}
} //else process all tokens ... no POS tag checking needed
if(process){
- double sentiment = classifier.classifyWord(token.getSpan());
+ String word = token.getSpan();
+ double sentiment = 0.0;
+ if(cats.isEmpty()){
+ sentiment = classifier.classifyWord(null, word);
+ } else { //in case of multiple Lexical Cats
+ //we build the average over NOT NULL sentiments for the word
+ int catSentNum = 0;
+ for(LexicalCategory cat : cats){
+ double catSent = classifier.classifyWord(cat, word);
+ if(catSent != 0.0){
+ catSentNum++;
+ sentiment = sentiment + catSent;
+ }
+ }
+ if(catSentNum > 0){
+ sentiment = sentiment / (double) catSentNum;
+ }
+ }
if(sentiment != 0.0){
token.addAnnotation(SENTIMENT_ANNOTATION, new Value<Double>(sentiment));
} //else do not set sentiments with 0.0
- }
+ } // else do not process
}
// } finally {
// ci.getLock().writeLock().unlock();
Modified: stanbol/branches/commons-ng/enhancement-engines/smartcn-token/src/license/THIRD-PARTY.properties
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/smartcn-token/src/license/THIRD-PARTY.properties?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/smartcn-token/src/license/THIRD-PARTY.properties (original)
+++ stanbol/branches/commons-ng/enhancement-engines/smartcn-token/src/license/THIRD-PARTY.properties Fri Oct 18 17:58:24 2013
@@ -4,6 +4,7 @@
# - Apache Software License
# - Apache Software License, Version 2.0
# - BSD License
+# - BSD-style
# - Common Development And Distribution License (CDDL), Version 1.0
# - Common Development And Distribution License (CDDL), Version 1.1
# - Common Public License, Version 1.0
@@ -13,11 +14,23 @@
# - GNU Lesser General Public License (LGPL), Version 2.1
# - ICU License
# - MIT License
+# - New BSD License
+# - New BSD license
# - Public Domain License
#-------------------------------------------------------------------------------
# Please fill the missing licenses for dependencies :
#
#
-#Thu Feb 07 13:59:58 CET 2013
+#Tue Sep 24 21:08:50 CEST 2013
+antlr--antlr--2.7.2=Public Domain
+commons-beanutils--commons-beanutils--1.7.0=Apache Software License, Version 2.0
+dom4j--dom4j--1.1=BSD-style
+jakarta-regexp--jakarta-regexp--1.4=Apache Software License, Version 2.0
+javax.servlet--servlet-api--2.5=Common Development And Distribution License (CDDL), Version 1.0
+javax.servlet.jsp--jsp-api--2.1=Common Development And Distribution License (CDDL), Version 1.0
+org.apache.zookeeper--zookeeper--3.4.5=The Apache Software License, Version 2.0
org.osgi--org.osgi.compendium--4.1.0=The Apache Software License, Version 2.0
org.osgi--org.osgi.core--4.1.0=The Apache Software License, Version 2.0
+org.restlet.jee--org.restlet--2.1.1=The Apache Software License, Version 2.0
+org.restlet.jee--org.restlet.ext.servlet--2.1.1=The Apache Software License, Version 2.0
+oro--oro--2.0.8=The Apache Software License, Version 2.0
Modified: stanbol/branches/commons-ng/enhancement-engines/textannotationnewmodel/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/commons-ng/enhancement-engines/textannotationnewmodel/pom.xml?rev=1533571&r1=1533570&r2=1533571&view=diff
==============================================================================
--- stanbol/branches/commons-ng/enhancement-engines/textannotationnewmodel/pom.xml (original)
+++ stanbol/branches/commons-ng/enhancement-engines/textannotationnewmodel/pom.xml Fri Oct 18 17:58:24 2013
@@ -1,4 +1,20 @@
<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
<project
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">