You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/11/26 12:39:38 UTC
svn commit: r1413560 [3/3] - in /stanbol/trunk: data/ data/defaultconfig/
data/defaultconfig/src/main/resources/
data/defaultconfig/src/main/resources/config/ data/opennlp/lang/de/
data/sentiment/ data/sentiment/sentiwordnet/ data/sentiment/sentiwordne...
Modified: stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NameOccurrence.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NameOccurrence.java?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NameOccurrence.java (original)
+++ stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NameOccurrence.java Mon Nov 26 11:39:25 2012
@@ -21,7 +21,9 @@ import org.apache.clerezza.rdf.core.UriR
public class NameOccurrence {
public final String name;
-
+
+ public final UriRef type;
+
public final Integer start;
public final Integer end;
@@ -30,14 +32,12 @@ public class NameOccurrence {
public final Double confidence;
- public final UriRef type;
-
public NameOccurrence(String name, Integer start, Integer end, UriRef type,
String context, Double confidence) {
- this.start = start;
- this.end = end;
this.name = name;
this.type = type;
+ this.start = start;
+ this.end = end;
this.context = context;
this.confidence = confidence;
}
Modified: stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java (original)
+++ stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NamedEntityExtractionEnhancementEngine.java Mon Nov 26 11:39:25 2012
@@ -51,7 +51,7 @@ import org.osgi.service.component.Compon
description = "%stanbol.NamedEntityExtractionEnhancementEngine.description")
@Service
@org.apache.felix.scr.annotations.Properties(value={
- @Property(name=EnhancementEngine.PROPERTY_NAME,value="ner"),
+ @Property(name=EnhancementEngine.PROPERTY_NAME,value="opennlp-ner"),
@Property(name=NamedEntityExtractionEnhancementEngine.PROCESSED_LANGUAGES,value=""),
@Property(name=NamedEntityExtractionEnhancementEngine.DEFAULT_LANGUAGE,value=""),
//set the ranking of the default config to a negative value (ConfigurationPolicy.OPTIONAL)
Modified: stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java (original)
+++ stanbol/trunk/enhancer/engines/opennlp-ner/src/test/java/org/apache/stanbol/enhancer/engines/opennlp/impl/TestNamedEntityExtractionEnhancementEngine.java Mon Nov 26 11:39:25 2012
@@ -178,7 +178,7 @@ public class TestNamedEntityExtractionEn
expectedValues.put(Properties.DC_TYPE, new UriRef("http://www.bootstrep.eu/ontology/GRO#DNA"));
MGraph g = ci.getMetadata();
int textAnnotationCount = validateAllTextAnnotations(g,EHEALTH,expectedValues);
- assertEquals(6, textAnnotationCount);
+ assertEquals(7, textAnnotationCount);
}
Propchange: stanbol/trunk/enhancer/engines/opennlp-pos/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 26 11:39:25 2012
@@ -0,0 +1,7 @@
+.project
+
+target
+
+.classpath
+
+.settings
Copied: stanbol/trunk/enhancer/engines/opennlp-pos/pom.xml (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/pom.xml)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/opennlp-pos/pom.xml?p2=stanbol/trunk/enhancer/engines/opennlp-pos/pom.xml&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/pom.xml&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/opennlp-pos/pom.xml (original)
+++ stanbol/trunk/enhancer/engines/opennlp-pos/pom.xml Mon Nov 26 11:39:25 2012
@@ -16,13 +16,13 @@
<parent>
<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
<groupId>org.apache.stanbol</groupId>
- <version>0.10.0-incubating-SNAPSHOT</version>
+ <version>0.10.0-SNAPSHOT</version>
<relativePath>../../parent</relativePath>
</parent>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.engines.opennlp.pos</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
+ <version>0.10.0-SNAPSHOT</version>
<packaging>bundle</packaging>
<name>Apache Stanbol Enhancer Enhancement Engine: POS Tagging</name>
@@ -32,12 +32,12 @@
<scm>
<connection>
- scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/opennlp-pos/
+ scm:svn:http://svn.apache.org/repos/asf/stanbol/trunk/enhancer/engines/opennlp-pos/
</connection>
<developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/opennlp-pos/
+ scm:svn:https://svn.apache.org/repos/asf/stanbol/trunk/enhancer/engines/opennlp-pos/
</developerConnection>
- <url>http://incubator.apache.org/stanbol/</url>
+ <url>http://stanbol.apache.org/</url>
</scm>
<build>
@@ -80,17 +80,17 @@
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
+ <version>0.10.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.commons.opennlp</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
+ <version>0.10.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.nlp</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
+ <version>0.10.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.felix</groupId>
Propchange: stanbol/trunk/enhancer/engines/opennlp-sentence/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 26 11:39:25 2012
@@ -0,0 +1,7 @@
+.settings
+
+.classpath
+
+target
+
+.project
Propchange: stanbol/trunk/enhancer/engines/opennlp-token/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 26 11:39:25 2012
@@ -0,0 +1,7 @@
+.settings
+
+target
+
+.classpath
+
+.project
Modified: stanbol/trunk/enhancer/engines/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/pom.xml?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/pom.xml (original)
+++ stanbol/trunk/enhancer/engines/pom.xml Mon Nov 26 11:39:25 2012
@@ -42,30 +42,48 @@
</scm>
<modules>
- <module>opennlp-ner</module>
- <module>langdetect</module>
- <module>langid</module>
- <module>topic</module>
- <module>metaxa</module>
+ <!-- Content processing / Metadata extraction -->
+ <module>tika</module>
<module>htmlextractor</module>
<module>xmpextractor</module>
- <module>tika</module>
+ <module>metaxa</module>
+ <module>refactor</module>
+
+ <!-- NLP processing engines -->
+ <module>langdetect</module>
+ <module>langid</module>
+ <module>opennlp-sentence</module>
+ <module>opennlp-token</module>
+ <module>opennlp-pos</module>
+ <module>opennlp-ner</module>
+ <module>opennlp-chunker</module>
+ <module>nlp2rdf</module> <!-- converts AnalyzedText ContentPart to RDF -->
+ <!-- EntityLinking -->
+ <module>entitylinking</module>
+ <module>entityhublinking</module>
<module>entitytagging</module>
+ <!-- deprecated -->
<module>keywordextraction</module>
- <module>refactor</module>
+
+ <!-- Categorization -->
+ <module>topic</module>
+
+ <!-- Sentiment -->
+ <module>sentiment-wordclassifier</module>
+ <module>sentiment-summarization</module>
+
+ <!-- UIMA based engines -->
+ <module>uimaremote</module>
+ <module>uimatotriples</module>
+ <module>uimalocal-template</module>
<!-- Enhancement Engines using external services -->
<module>celi</module> <!-- http://linguagrid.org -->
+ <module>dbpedia-spotlight</module> <!-- dbpedia Spotlight -->
<module>geonames</module> <!-- http://geonames.org -->
<module>opencalais</module> <!-- http://opencalais.com/ -->
<module>zemanta</module> <!-- htt://zemanta.com -->
- <!-- DBpedia.org Spotlight Enhancement Engines (STANBOL-706) -->
- <module>dbpedia-spotlight</module>
- <!-- UIMA based engines -->
- <module>uimaremote</module>
- <module>uimatotriples</module>
- <module>uimalocal-template</module>
</modules>
</project>
Propchange: stanbol/trunk/enhancer/engines/sentiment-summarization/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 26 11:39:25 2012
@@ -0,0 +1,7 @@
+target
+
+.classpath
+
+.settings
+
+.project
Propchange: stanbol/trunk/enhancer/engines/sentiment-wordclassifier/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 26 11:39:25 2012
@@ -0,0 +1,7 @@
+target
+
+.settings
+
+.classpath
+
+.project
Copied: stanbol/trunk/enhancer/engines/sentiment-wordclassifier/pom.xml (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/pom.xml)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/sentiment-wordclassifier/pom.xml?p2=stanbol/trunk/enhancer/engines/sentiment-wordclassifier/pom.xml&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/pom.xml&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/engines/sentiment-wordclassifier/pom.xml (original)
+++ stanbol/trunk/enhancer/engines/sentiment-wordclassifier/pom.xml Mon Nov 26 11:39:25 2012
@@ -16,13 +16,13 @@
<parent>
<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
<groupId>org.apache.stanbol</groupId>
- <version>0.10.0-incubating-SNAPSHOT</version>
+ <version>0.10.0-SNAPSHOT</version>
<relativePath>../../parent</relativePath>
</parent>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.engines.sentiment.wordclassifier</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
+ <version>0.10.0-SNAPSHOT</version>
<packaging>bundle</packaging>
<name>Apache Stanbol Enhancer Enhancement Engine: Sentiment Word Tagging Engine</name>
@@ -38,12 +38,12 @@
<scm>
<connection>
- scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/sentiment-wordtagging/
+ scm:svn:http://svn.apache.org/repos/asf/stanbol/trunk/enhancer/engines/sentiment-wordtagging/
</connection>
<developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/engines/sentiment-wordtagging/
+ scm:svn:https://svn.apache.org/repos/asf/stanbol/trunk/enhancer/engines/sentiment-wordtagging/
</developerConnection>
- <url>http://incubator.apache.org/stanbol/</url>
+ <url>http://stanbol.apache.org/</url>
</scm>
<properties>
@@ -93,17 +93,17 @@
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
+ <version>0.10.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.nlp</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
+ <version>0.10.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.commons.solr.core</artifactId>
- <version>0.10.1-incubating-SNAPSHOT</version>
+ <version>0.10.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.felix</groupId>
Propchange: stanbol/trunk/enhancer/generic/nlp/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 26 11:39:25 2012
@@ -0,0 +1,7 @@
+.project
+
+.settings
+
+.classpath
+
+target
Copied: stanbol/trunk/enhancer/generic/nlp/pom.xml (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/pom.xml)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp/pom.xml?p2=stanbol/trunk/enhancer/generic/nlp/pom.xml&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/pom.xml&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/pom.xml (original)
+++ stanbol/trunk/enhancer/generic/nlp/pom.xml Mon Nov 26 11:39:25 2012
@@ -15,12 +15,12 @@
<parent>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.parent</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
+ <version>0.10.0-SNAPSHOT</version>
<relativePath>../../parent</relativePath>
</parent>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.nlp</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
+ <version>0.10.0-SNAPSHOT</version>
<packaging>bundle</packaging>
<name>Apache Stanbol Enhancer NLP</name>
@@ -31,12 +31,12 @@
<scm>
<connection>
- scm:svn:http://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/generic/nlp/
+ scm:svn:http://svn.apache.org/repos/asf/stanbol/trunk/enhancer/generic/nlp/
</connection>
<developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/incubator/stanbol/trunk/enhancer/generic/nlp/
+ scm:svn:https://svn.apache.org/repos/asf/stanbol/trunk/enhancer/generic/nlp/
</developerConnection>
- <url>http://incubator.apache.org/stanbol/</url>
+ <url>http://stanbol.apache.org/</url>
</scm>
<build>
@@ -54,10 +54,13 @@
org.apache.stanbol.enhancer.nlp;version=${project.version},
org.apache.stanbol.enhancer.nlp.model;version=${project.version},
org.apache.stanbol.enhancer.nlp.model.annotation;version=${project.version},
- org.apache.stanbol.enhancer.nlp.ontology;version=${project.version},
+ org.apache.stanbol.enhancer.nlp.model.tag;version=${project.version},
+ org.apache.stanbol.enhancer.nlp.nif;version=${project.version},
+ org.apache.stanbol.enhancer.nlp.ner;version=${project.version},
org.apache.stanbol.enhancer.nlp.pos;version=${project.version},
org.apache.stanbol.enhancer.nlp.pos.*;version=${project.version},
org.apache.stanbol.enhancer.nlp.phrase;version=${project.version},
+ org.apache.stanbol.enhancer.nlp.morpho;version=${project.version},
org.apache.stanbol.enhancer.nlp.sentiment;version=${project.version},
org.apache.stanbol.enhancer.nlp.utils;version=${project.version}
</Export-Package>
@@ -80,6 +83,14 @@
<plugin>
<groupId>org.apache.felix</groupId>
<artifactId>maven-scr-plugin</artifactId>
+ <configuration>
+ <!-- Need to exlude the Pos enum because it creates a
+ IndexArrayOutOfBoundException - most likely because
+ of character encoding issues -->
+ <sourceExcludes>
+ **/enhancer/nlp/pos/Pos.java
+ </sourceExcludes>
+ </configuration>
</plugin>
</plugins>
</build>
@@ -88,25 +99,23 @@
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.servicesapi</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
+ <version>0.10.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
- <version>3.2.1</version>
</dependency>
<!-- Logging -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
- <version>1.6.1</version>
</dependency>
<!-- test dependencies -->
<dependency>
<groupId>org.apache.stanbol</groupId>
<artifactId>org.apache.stanbol.enhancer.core</artifactId>
- <version>0.10.0-incubating-SNAPSHOT</version>
+ <version>0.10.0-SNAPSHOT</version>
<scope>test</scope>
</dependency>
<dependency>
@@ -119,7 +128,5 @@
<artifactId>slf4j-simple</artifactId>
<scope>test</scope>
</dependency>
-
</dependencies>
-
</project>
Copied: stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java?p2=stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java (original)
+++ stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java Mon Nov 26 11:39:25 2012
@@ -4,9 +4,10 @@ import org.apache.stanbol.enhancer.nlp.m
import org.apache.stanbol.enhancer.nlp.model.Chunk;
import org.apache.stanbol.enhancer.nlp.model.Token;
import org.apache.stanbol.enhancer.nlp.model.annotation.Annotation;
+import org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures;
+import org.apache.stanbol.enhancer.nlp.ner.NerTag;
import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
import org.apache.stanbol.enhancer.nlp.pos.PosTag;
-import org.apache.stanbol.enhancer.nlp.sentiment.SentimentTag;
/**
* Defines the {@link Annotation} constants typically used by NLP components
@@ -19,7 +20,11 @@ public interface NlpAnnotations {
*/
Annotation<String,PosTag> POS_ANNOTATION = new Annotation<String,PosTag>(
"stanbol.enhancer.nlp.pos", PosTag.class);
-
+ /**
+ *
+ */
+ Annotation<String,NerTag> NER_ANNOTATION = new Annotation<String,NerTag>(
+ "stanbol.enhancer.nlp.ner", NerTag.class);
/**
* The Phrase {@link Annotation} added by chunker to a group of
@@ -33,14 +38,39 @@ public interface NlpAnnotations {
* The Sentiment {@link Annotation} added by a sentiment tagger typically
* to single {@link Token}s that do carry a positive or negative sentiment.
*/
- Annotation<String,SentimentTag> SENTIMENT_ANNOTATION = new Annotation<String,SentimentTag>(
- "stanbol.enhancer.nlp.sentiment", SentimentTag.class);
+ Annotation<String,Double> SENTIMENT_ANNOTATION = new Annotation<String,Double>(
+ "stanbol.enhancer.nlp.sentiment", Double.class);
/**
- * The Lemma {@link Annotation} for a word. Typically used for
- * {@link Token}s. The value is the {@link String} representing the
- * Lemma of the Word
+ * {@link Annotation} representing the Morphological analysis of a word.
+ * Typically used on {@link Token}s.<p>
+ * The {@link MorphoFeatures} defines at least the Lemma and [1..*] POS tags.
+ * NOTE that the POS tag information does not assign a Tag to the {@link Token},
+ * but rather specifies that if the Token is classified by a {@link #POS_ANNOTATION}
+ * to be of one of the Tags the definitions of this {@link MorphoFeatures} can
+ * be applied.
*/
- Annotation<String,String> LEMMA_ANNOTATION = new Annotation<String,String>(
- "stanbol.enhancer.nlp.lemma",String.class);
-
+ Annotation<String,MorphoFeatures> MORPHO_ANNOTATION = new Annotation<String,MorphoFeatures>(
+ "stanbol.enhancer.nlp.morpho",MorphoFeatures.class);
+
+ /*
+ * Currently only used as part of MorphoFeatures
+ */
+// Annotation<String,CaseTag> CASE_ANNOTATION = new Annotation<String,CaseTag>(
+// "stanbol.enhancer.nlp.morpho.case",CaseTag.class);
+//
+// Annotation<String,GenderTag> GENDER_ANNOTATION = new Annotation<String,GenderTag>(
+// "stanbol.enhancer.nlp.morpho.gender",GenderTag.class);
+//
+// Annotation<String,NumberTag> NUMBER_ANNOTATION = new Annotation<String,NumberTag>(
+// "stanbol.enhancer.nlp.morpho.number",NumberTag.class);
+//
+// Annotation<String,PersonTag> PERSON_ANNOTATION = new Annotation<String,PersonTag>(
+// "stanbol.enhancer.nlp.morpho.person",PersonTag.class);
+//
+// Annotation<String,TenseTag> TENSE_ANNOTATION = new Annotation<String,TenseTag>(
+// "stanbol.enhancer.nlp.morpho.tense",TenseTag.class);
+//
+// Annotation<String,VerbMoodTag> VERB_MOOD_ANNOTATION = new Annotation<String,VerbMoodTag>(
+// "stanbol.enhancer.nlp.morpho.verb-mood",VerbMoodTag.class);
+
}
Copied: stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/annotation/Value.java (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/annotation/Value.java)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/annotation/Value.java?p2=stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/annotation/Value.java&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/annotation/Value.java&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/annotation/Value.java (original)
+++ stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/annotation/Value.java Mon Nov 26 11:39:25 2012
@@ -8,7 +8,7 @@ import java.util.Set;
public final class Value<T> {
/**
- * For Values that do not have a probability we use {@link Double#NaN}
+ * For Values that do not have a probability we use <code>-1.0d</code>
*/
public static final double UNKNOWN_PROBABILITY = -1.0d;
Copied: stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/AnalysedTextFactoryImpl.java (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/AnalysedTextFactoryImpl.java)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/AnalysedTextFactoryImpl.java?p2=stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/AnalysedTextFactoryImpl.java&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/AnalysedTextFactoryImpl.java&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/AnalysedTextFactoryImpl.java (original)
+++ stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/AnalysedTextFactoryImpl.java Mon Nov 26 11:39:25 2012
@@ -9,8 +9,6 @@ import org.apache.felix.scr.annotations.
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
import org.apache.stanbol.enhancer.servicesapi.Blob;
-import org.apache.stanbol.enhancer.servicesapi.ContentItem;
-import org.apache.stanbol.enhancer.servicesapi.NoSuchPartException;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.osgi.framework.Constants;
@@ -22,36 +20,6 @@ import org.osgi.framework.Constants;
public class AnalysedTextFactoryImpl extends AnalysedTextFactory {
@Override
- public AnalysedText createAnalysedText(ContentItem ci, Blob blob) throws IOException {
- ci.getLock().readLock().lock();
- try {
- AnalysedText existing = ci.getPart(AnalysedText.ANALYSED_TEXT_URI, AnalysedText.class);
- throw new IllegalStateException("The AnalysedText ContentPart already exists (impl: "
- +existing.getClass().getSimpleName()+"| blob: "+existing.getBlob().getMimeType()+")");
- }catch (NoSuchPartException e) {
- //this is the expected case
- }catch (ClassCastException e) {
- throw new IllegalStateException("A ContentPart with the URI '"
- + AnalysedText.ANALYSED_TEXT_URI+"' already exists but the parts "
- + "type is not compatible with "+AnalysedText.class.getSimpleName()+"!",
- e);
- } finally {
- ci.getLock().readLock().unlock();
- }
- //create the Analysed text
- AnalysedText at = createAnalysedText(blob);
- ci.getLock().writeLock().lock();
- try {
- //NOTE: there is a possibility that an other thread has added
- // the contentpart
- ci.addPart(AnalysedText.ANALYSED_TEXT_URI, at);
- } finally {
- ci.getLock().writeLock().unlock();
- }
- return at;
- }
-
- @Override
public AnalysedText createAnalysedText(Blob blob) throws IOException {
String text = ContentItemHelper.getText(blob);
return new AnalysedTextImpl(blob,text);
Copied: stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/SpanImpl.java (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/SpanImpl.java)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/SpanImpl.java?p2=stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/SpanImpl.java&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/SpanImpl.java&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/SpanImpl.java (original)
+++ stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/SpanImpl.java Mon Nov 26 11:39:25 2012
@@ -134,7 +134,8 @@ public abstract class SpanImpl extends A
@Override
public int compareTo(Span o) {
- if(!context.equals(o.getContext())){
+ if(context != null && o.getContext() != null &&
+ !context.equals(o.getContext())){
log.warn("Comparing Spans with different Context. This is not an " +
"intended usage of this class as start|end|type parameters " +
"do not have a natural oder over different texts.");
Copied: stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/PosTag.java (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/PosTag.java)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/PosTag.java?p2=stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/PosTag.java&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/PosTag.java&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/PosTag.java (original)
+++ stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/PosTag.java Mon Nov 26 11:39:25 2012
@@ -1,8 +1,13 @@
package org.apache.stanbol.enhancer.nlp.pos;
-import org.apache.stanbol.enhancer.nlp.Tag;
-import org.apache.stanbol.enhancer.nlp.TagSet;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.Set;
+
import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.tag.Tag;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
/**
@@ -16,53 +21,185 @@ import org.apache.stanbol.enhancer.servi
*/
public class PosTag extends Tag<PosTag>{
- private final LexicalCategory category;
/**
- * Creates a new POS tag for the parsed tag. The created Tag is not
- * assigned to any {@link LexicalCategory}.<p> This constructor can be used
- * by {@link EnhancementEngine}s that encounter an Tag they do not know
- * (e.g. that is not defined by the configured {@link TagSet}).<p>
- * @param tag the Tag
+ * The {@link LexicalCategory LexicalCategories} applying to this PosTag
+ */
+ private final Set<LexicalCategory> category;
+ /**
+ * The mapped {@link Pos} tags. Empty if none are mapped
+ */
+ private final Set<Pos> pos;
+ /**
+ * NOTE: NULL if {@link #pos} is empty!
+ */
+ private final Set<Pos> posHierarchy;
+// /**
+// * Creates a new POS tag for the parsed tag. The created Tag is not
+// * assigned to any {@link LexicalCategory}.<p> This constructor can be used
+// * by {@link EnhancementEngine}s that encounter an Tag they do not know
+// * (e.g. that is not defined by the configured {@link TagSet}).<p>
+// * @param tag the Tag
+// * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+// * or empty.
+// */
+// public PosTag(String tag){
+// this(tag,(LexicalCategory)null);
+// }
+ /**
+ * Creates a PosTag that is assigned to a {@link LexicalCategory}
+ * @param tag the tag
+ * @param category the lexical categor(ies) mapped to the tag
* @throws IllegalArgumentException if the parsed tag is <code>null</code>
* or empty.
*/
- public PosTag(String tag){
- this(tag,null);
+ public PosTag(String tag,LexicalCategory...category){
+ super(tag);
+ this.category = EnumSet.noneOf(LexicalCategory.class);
+ if(category != null){
+ this.category.addAll(Arrays.asList(category));
+ }
+ this.pos = Collections.emptySet();
+ this.posHierarchy = Collections.emptySet();
}
/**
* Creates a PosTag that is assigned to a {@link LexicalCategory}
* @param tag the tag
- * @param category the lexical category or <code>null</code> if not known
+ * @param pos a concrete {@link Pos} mapped to the string
+ * @param furtherPos allows to add additional {@link Pos} mappings
* @throws IllegalArgumentException if the parsed tag is <code>null</code>
* or empty.
*/
- public PosTag(String tag,LexicalCategory category){
+ public PosTag(String tag,Pos pos,Pos...furtherPos){
+ this(tag, null,pos,furtherPos);
+ }
+
+ public PosTag(String tag,LexicalCategory category, Pos pos,Pos...furtherPos){
super(tag);
- this.category = category;
+ if(pos != null){
+ if(furtherPos == null || furtherPos.length < 1){
+ this.pos = Collections.singleton(pos);
+ this.posHierarchy = pos.hierarchy();
+ if(category == null){
+ this.category = pos.categories();
+ } else {
+ this.category = EnumSet.of(category);
+ this.category.addAll(pos.categories());
+ }
+ } else { // in case of multiple Pos Tags
+ this.pos = EnumSet.of(pos,furtherPos);
+ //we need to collect categories
+ this.category = category == null ?
+ EnumSet.noneOf(LexicalCategory.class) :
+ EnumSet.of(category);
+ //and the union over the pos parents
+ this.posHierarchy = EnumSet.noneOf(Pos.class);
+ for(Pos p : this.pos){
+ this.posHierarchy.addAll(p.hierarchy());
+ this.category.addAll(p.categories());
+ }
+ }
+ } else {
+ if(furtherPos != null && furtherPos.length > 0){
+ throw new IllegalArgumentException("furtherPos parameter MUST BE NULL "
+ + "or empty if the pos parameter is NULL!");
+ }
+ this.category = category == null ?
+ Collections.EMPTY_SET : Collections.singleton(category);
+ this.pos = Collections.emptySet();
+ this.posHierarchy = Collections.emptySet();
+ }
}
/**
- * The LecxialCategory of this tag (if known)
- * @return the category or <code>null</code> if not mapped to any
+ * The {@link LexicalCategory LexicalCategories} of this tag
+ * @return the {@link LexicalCategory LexicalCategories} or an
+ * empty {@link Set} if the string {@link #getTag() tag} is
+ * not mapped.
*/
- public LexicalCategory getCategory(){
+ public Set<LexicalCategory> getCategories(){
return category;
}
+ /**
+ * Checks if this {@link PosTag} is mapped to the parsed
+ * {@link LexicalCategory}
+ * @param category the category
+ * @return <code>true</code> if this PosTag is mapped to
+ * the parsed category.
+ */
+ public boolean hasCategory(LexicalCategory category){
+ return this.category.contains(category);
+ }
+
+ /**
+ * Checks if the {@link PosTag} is of the parsed {@link Pos}
+ * tag. This also considers the transitive hierarchy of
+ * the {@link Pos} enum.
+ * @param pos the {@link Pos} to check
+ * @return <code>true</code> if this PosTag is mapped to
+ * the parsed {@link Pos}.
+ */
+ public boolean hasPos(Pos pos){
+ return this.pos.isEmpty() ? false :
+ posHierarchy.contains(pos);
+ }
+ /**
+ * Returns <code>true</code> if this PosTag is mapped to a
+ * {@link LexicalCategory} or a {@link Pos} type as defined
+ * by the <a herf="">Olia</a> Ontology
+ * @return
+ */
+ public boolean isMapped() {
+ return !category.isEmpty();
+ }
+
+ /**
+ * Getter for the {@link Pos} mapped to this PosTag
+ * @return the mapped {@link Pos} mapped to the string
+ * string {@link #getTag() tag} or an empty set of not
+ * mapped. This are the directly mapped {@link Pos} types
+ * and does not include the parent Pos types.
+ */
+ public Set<Pos> getPos() {
+ return pos;
+ }
+
+ public Set<Pos> getPosHierarchy(){
+ return posHierarchy;
+ }
+
@Override
public String toString() {
- return String.format("POS %s (%s)", tag,
- category == null ? "none" : category.name());
+ StringBuilder sb = new StringBuilder("pos: ");
+ sb.append(tag);
+ if(pos != null || !category.isEmpty()){
+ sb.append('(');
+ if(!pos.isEmpty()){
+ if(pos.size() == 1){
+ sb.append(pos.iterator().next());//.name());
+ } else {
+ sb.append(pos);
+ }
+ sb.append('|');
+ }
+ if(category.size() == 1){
+ sb.append(category.iterator().next());//.name());
+ } else {
+ sb.append(category);
+ }
+ sb.append(')');
+ }
+ return sb.toString();
}
@Override
public int hashCode() {
- return tag.hashCode();
+ return tag.hashCode() + category.hashCode() + pos.hashCode();
}
@Override
public boolean equals(Object obj) {
return super.equals(obj) && obj instanceof PosTag &&
- (category == null && ((PosTag)obj).category == null) ||
- (category != null && category.equals(((PosTag)obj).category));
+ category.equals(((PosTag)obj).category) &&
+ pos.equals(((PosTag)obj).pos);
}
}
Copied: stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java?p2=stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java (original)
+++ stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java Mon Nov 26 11:39:25 2012
@@ -9,20 +9,60 @@ import java.util.HashSet;
import java.util.Map;
import java.util.Set;
+import org.osgi.framework.ServiceReference;
import org.osgi.service.cm.ConfigurationException;
/**
- * Utility that supports the configuration of languages in the form of
+ * Utility that supports the configuration of languages and language
+ * specific parameters.
+ * <h3>Language configuration</h3>
+ * Languages are configured as follows:
* <pre>
- * de,en
- * </pre>
- * of
+ * de,en </pre>
+ * or
* <pre>
- * !fr,!cn,*
- * </pre>
+ * !fr,!cn,*</pre>
+ * The '<code>!{lang}</code>' is used to {@link #getExplicitlyExcluded()
+ * explicitly exclude} an language. '<code>*</code>' can be used to
+ * specify that all languages are allowed. '<code>{lang}</code>'
+ * {@link #getExplicitlyIncluded() explicitly includes} a language.
+ * '<code>,</code>' is used as separator between multiple configurations
+ * however this class also supports the usage of <code>String[]</code> and
+ * {@link Collection<?>} (in case of Collections the
+ * {@link Object#toString()} method is used to obtain the configuration).
+ * If an array or a collection is used for the configuration, than comma
+ * is NOT used as separator!
* <p>
- * Instead of comma separated Strings also <code>String[]</code> and
- * {@link Collection} are supported.
+ * <h3>Parameter Support</h3>
+ * This class supports the parsing of language specific parameters by
+ * the followng syntax
+ * <pre>
+ * {language};{param-name}={param-value};{param-name}={param-value}</pre>
+ * Parameters that apply to all {languages} with no configuration can be
+ * either set for the '<code>*</code>' or an empty language tag. Here
+ * is an example
+ * <pre>
+ * *;myParam=myValue
+ * ;myParam=myValue</pre>
+ * Multiple default configurations will cause a {@link ConfigurationException}.
+ * <p>
+ * The {@link #getParameters(String)} and {@link #getParameters(String,String)}
+ * will return values of the {@link #getDefaultParameters()} if no
+ * language specific parameters are present for the requested language. However
+ * the default configuration is not merged but replaced by language specific
+ * parameter declarations. Applications that want to use the default configuration
+ * as fallback to language specific settings can implement this by
+ * using the properties provided by {@link #getDefaultParameters()}.
+ * <p>
+ * <b>Notes</b> <ul>
+ * <li>only the first occurrence of '<code>=</code>' within an
+ * parameter is used as separator between the param name and value. This
+ * means that the {param-name} is allowed to contain '='.
+ * <li>in case a comma separated string is used for the lanugage
+ * configuration parameter declaration MUST NOT contain
+ * '<code>,</code>' (comma) values. In case a <code>String[]</code> or an
+ * {@link Collection} is used this is not the case.
+ * </ul>
*
* @author Rupert Westenthaler
*
@@ -37,7 +77,7 @@ public class LanguageConfiguration {
private Map<String,Map<String,String>> configuredLanguages = new HashMap<String,Map<String,String>>();
private Set<String> excludedLanguages = new HashSet<String>();
private boolean allowAll;
-
+ private Map<String,String> defaultParameters = EMPTY_PARAMS;
@SuppressWarnings("unchecked")
public LanguageConfiguration(String property, String[] defaultConfig){
if(property == null || property.isEmpty()){
@@ -60,12 +100,40 @@ public class LanguageConfiguration {
/**
* Reads the config for the configured {@link #getProperty() property}
- * from the parsed configuration. This supports <code>String[]</code>,
- * <code>Collection</code>, and comma separated Strings
- * @param configuration
+ * from the parsed configuration. <p>
+ * This implementation supports
+ * <code>null</code> (sets the default), <code>String[]</code>,
+ * <code>Collections<?></code> (Object{@link #toString() toString()} is called
+ * on members) and comma separated {@link String}.
+ * @param configuration the configuration
*/
public void setConfiguration(Dictionary<?,?> configuration) throws ConfigurationException {
- Object value = configuration.get(property);
+ processConfiguration(configuration.get(property));
+ }
+ /**
+ * Reads the configuration for the configured {@link #getProperty()} from
+ * the properties of the parsed {@link ServiceReference}.<p>
+ * This implementation supports
+ * <code>null</code> (sets the default), <code>String[]</code>,
+ * <code>Collections<?></code> (Object{@link #toString() toString()} is called
+ * on members) and comma separated {@link String}.
+ * @param ref the SerivceRefernece
+ * @throws ConfigurationException
+ */
+ public void setConfiguration(ServiceReference ref) throws ConfigurationException {
+ processConfiguration(ref.getProperty(property));
+ }
+
+ /**
+ * Reads the configuration for the parsed value. <p>
+ * This implementation supports
+ * <code>null</code> (sets the default), <code>String[]</code>,
+ * <code>Collections<?></code> (Object{@link #toString() toString()} is called
+ * on members) and comma separated {@link String}.
+ * @param value the value
+ * @throws ConfigurationException if the configuration of is invalid
+ */
+ protected void processConfiguration(Object value) throws ConfigurationException {
Collection<?> config;
if(value == null){
config = defaultConfig;
@@ -88,6 +156,10 @@ public class LanguageConfiguration {
if(config == null){
config = defaultConfig;
}
+ //rest values
+ configuredLanguages.clear();
+ excludedLanguages.clear();
+ defaultParameters = EMPTY_PARAMS; //do not change values in multi threaded environments
for(Object value : config) {
if(value == null){
continue; //ignore null values
@@ -96,7 +168,7 @@ public class LanguageConfiguration {
int sepIndex = line.indexOf(';');
String lang = sepIndex < 0 ? line : line.substring(0, sepIndex).trim();
lang = lang.toLowerCase();
- if(lang.charAt(0) == '!'){ //exclude
+ if(lang.length() > 0 && lang.charAt(0) == '!'){ //exclude
lang = lang.substring(1);
if(configuredLanguages.containsKey(lang)){
throw new ConfigurationException(property,
@@ -106,6 +178,7 @@ public class LanguageConfiguration {
excludedLanguages.add(lang);
} else if("*".equals(lang)){
allowAll = true;
+ parsedDefaultParameters(line, sepIndex);
} else if(!lang.isEmpty()){
if(excludedLanguages.contains(lang)){
throw new ConfigurationException(property,
@@ -118,10 +191,32 @@ public class LanguageConfiguration {
configuredLanguages.put(lang,sepIndex >= 0 && sepIndex < line.length()-2 ?
parseParameters(line.substring(sepIndex, line.length()).trim()) :
EMPTY_PARAMS);
+ } else { //language tag is empty (line starts with an ';'
+ //this indicates that this is used to configure the default parameters
+ parsedDefaultParameters(line, sepIndex);
}
}
}
/**
+ * Parsed the {@link #defaultParameters} and also checks that not multiple
+ * (non empty) of such configurations are present
+ * @param line the current line
+ * @param sepIndex the index of first ';' in the configuration line
+ * @throws ConfigurationException if multiple default configurations are present or
+ * if the parameters are illegal formatted.
+ */
+ private void parsedDefaultParameters(String line, int sepIndex) throws ConfigurationException {
+ if(!defaultParameters.isEmpty()){
+ throw new ConfigurationException(property, "Language Configuration MUST NOT "
+ + "contain multiple default property configurations. This are configurations "
+ + "of properties for the wildcard '*;{properties}' or the empty language "
+ + "';{properties}'.");
+ }
+ defaultParameters = sepIndex >= 0 && sepIndex < line.length()-2 ?
+ parseParameters(line.substring(sepIndex, line.length()).trim()) :
+ EMPTY_PARAMS;
+ }
+ /**
* Parses optional parameters <code>{key}[={value}];{key2}[={value2}]</code>. Using
* the same key multiple times will override the previouse value
* @param paramString
@@ -164,13 +259,54 @@ public class LanguageConfiguration {
configuredLanguages.containsKey(language);
}
/**
+ * The explicitly configured languages
+ * @return
+ */
+ public Set<String> getExplicitlyIncluded(){
+ return configuredLanguages.keySet();
+ }
+ /**
+ * The explicitly excluded (e.g. !de) languages
+ * @return
+ */
+ public Set<String> getExplicitlyExcluded(){
+ return excludedLanguages;
+ }
+ /**
+ * If the '*' was used in the configuration to allow
+ * all lanugages.
+ * @return
+ */
+ public boolean useWildcard(){
+ return allowAll;
+ }
+
+ /**
* Returns parsed parameters if <code>{@link #isLanguage(String)} == true</code>
* @param language the language
* @return the parameters or <code>null</code> if none or the parsed language
* is not active.
*/
public Map<String,String> getParameters(String language){
- return isLanguage(language) ? configuredLanguages.get(language) : null;
+ if(isLanguage(language)){
+ Map<String,String> params = configuredLanguages.get(language);
+ if((params == null || params.isEmpty()) && //if no or empty parameters
+ !defaultParameters.isEmpty()){ //and there are defaults
+ params = defaultParameters;
+ } else if(params == null){ //do not return NULL
+ params = EMPTY_PARAMS;
+ }
+ return params;
+ } else {
+ return null; //to indicate the parsed language is not active
+ }
+ }
+ /**
+ * Getter for the default parameters
+ * @return the default parameters, an empty map if none.
+ */
+ public Map<String,String> getDefaultParameters() {
+ return defaultParameters;
}
/**
@@ -179,7 +315,7 @@ public class LanguageConfiguration {
public void setDefault() {
try {
parseConfiguration(defaultConfig);
- }catch (ConfigurationException e) {
+ } catch (ConfigurationException e) {
// can not happen else the default config is already validated
// within the constructor
}
Copied: stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NIFHelper.java (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NIFHelper.java)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NIFHelper.java?p2=stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NIFHelper.java&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NIFHelper.java&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NIFHelper.java (original)
+++ stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NIFHelper.java Mon Nov 26 11:39:25 2012
@@ -24,14 +24,16 @@ import org.apache.stanbol.enhancer.nlp.N
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.Chunk;
import org.apache.stanbol.enhancer.nlp.model.Span;
+import org.apache.stanbol.enhancer.nlp.model.Span.SpanTypeEnum;
import org.apache.stanbol.enhancer.nlp.model.Token;
import org.apache.stanbol.enhancer.nlp.model.annotation.Annotated;
import org.apache.stanbol.enhancer.nlp.model.annotation.Annotation;
import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
-import org.apache.stanbol.enhancer.nlp.ontology.SsoOntology;
-import org.apache.stanbol.enhancer.nlp.ontology.StringOntology;
+import org.apache.stanbol.enhancer.nlp.nif.SsoOntology;
+import org.apache.stanbol.enhancer.nlp.nif.StringOntology;
import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.pos.Pos;
import org.apache.stanbol.enhancer.nlp.pos.PosTag;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
@@ -41,6 +43,18 @@ public final class NIFHelper {
private static final LiteralFactory lf = LiteralFactory.getInstance();
private NIFHelper(){}
+
+ public static final Map<SpanTypeEnum,UriRef> SPAN_TYPE_TO_SSO_TYPE;
+ static {
+ Map<SpanTypeEnum,UriRef> mapping = new EnumMap<SpanTypeEnum,UriRef>(SpanTypeEnum.class);
+ //mapping.put(SpanTypeEnum.Text, null);
+ //mapping.put(SpanTypeEnum.TextSection, null);
+ mapping.put(SpanTypeEnum.Sentence, SsoOntology.Sentence.getUri());
+ mapping.put(SpanTypeEnum.Chunk, SsoOntology.Phrase.getUri());
+ mapping.put(SpanTypeEnum.Token, SsoOntology.Word.getUri());
+ SPAN_TYPE_TO_SSO_TYPE = Collections.unmodifiableMap(mapping);
+ }
+
/**
* Read-only map that maps from the {@link LexicalCategory} to the OLIA
* Concept representing the Phrase (e.g. {@link LexicalCategory#Noun} maps
@@ -225,9 +239,15 @@ public final class NIFHelper {
public static void writePos(MGraph graph, Annotated annotated, UriRef segmentUri) {
Value<PosTag> posTag = annotated.getAnnotation(NlpAnnotations.POS_ANNOTATION);
if(posTag != null){
- if(posTag.value().getCategory() != null){
- graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(),
- posTag.value().getCategory().getUri()));
+ if(posTag.value().isMapped()){
+ for(Pos pos : posTag.value().getPos()){
+ graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(),
+ pos.getUri()));
+ }
+ for(LexicalCategory cat : posTag.value().getCategories()){
+ graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(),
+ cat.getUri()));
+ }
}
graph.add(new TripleImpl(segmentUri, SsoOntology.posTag.getUri(),
lf.createTypedLiteral(posTag.value().getTag())));
Copied: stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java (from r1388016, incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java)
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java?p2=stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java&p1=incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java&r1=1388016&r2=1413560&rev=1413560&view=diff
==============================================================================
--- incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java (original)
+++ stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NlpEngineHelper.java Mon Nov 26 11:39:25 2012
@@ -3,10 +3,13 @@ package org.apache.stanbol.enhancer.nlp.
import static java.util.Collections.singleton;
import java.io.IOException;
+import java.util.Dictionary;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.nlp.NlpProcessingRole;
+import org.apache.stanbol.enhancer.nlp.NlpServiceProperties;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextUtils;
@@ -14,6 +17,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.slf4j.Logger;
@@ -99,6 +103,30 @@ public final class NlpEngineHelper {
}
}
/**
+ * Used in {@link #canEnhance(ContentItem)} to check if a {@link ContentItem}
+ * should be processed based on the language configuration of this engine.
+ * @param engine the {@link EnhancementEngine} calling this method
+ * @param languageConfiguration the language configuration
+ * @param language the language
+ * @param exception <code>false</code> id used in {@link #canEnhance(ContentItem)}
+ * and <code>true</code> when called from {@link #computeEnhancements(ContentItem)}
+ * @return the state
+ * @throws IllegalStateException if exception is <code>true</code> and the
+ * language is not configured as beeing processed.
+ */
+ public static boolean isLangaugeConfigured(EnhancementEngine engine, LanguageConfiguration languageConfiguration, String language, boolean exception){
+ boolean state = languageConfiguration.isLanguage(language);
+ if(!state && exception){
+ throw new IllegalStateException("Language "+language+" is not included "
+ + "by the LanguageConfiguration of this engine (name "+ engine.getName()
+ + "). As this is also checked in canEnhancer this may indicate an Bug in the "
+ + "used EnhancementJobManager!");
+ } else {
+ return state;
+ }
+ }
+
+ /**
* Retrieves - or if not present - creates the {@link AnalysedText} content
* part for the parsed {@link ContentItem}. If the {@link Blob} with the
* mime type '<code>text/plain</code>' is present this method
@@ -179,5 +207,30 @@ public final class NlpEngineHelper {
return null;
}
}
-
+ /**
+ * Parsed the {@link NlpProcessingRole} typically provided by the
+ * {@link ServiceProperties#getServiceProperties()} provided by some
+ * EnhancementEngines.<p>
+ * This supports both {@link NlpProcessingRole} as well as String values
+ * using the {@link NlpProcessingRole#name()}.
+ * @param properties the properties (typically retrieved from the
+ * {@link ServiceProperties#getServiceProperties()} method)
+ * @return the NLP processing role or <code>null</code> if not present OR
+ * an error while parsing.
+ */
+ public static NlpProcessingRole getNlpProcessingRole(Map<String,Object> properties){
+ Object value = properties.get(NlpServiceProperties.ENHANCEMENT_ENGINE_NLP_ROLE);
+ if(value instanceof NlpProcessingRole){
+ return (NlpProcessingRole)value;
+ } else if(value != null){
+ try {
+ return NlpProcessingRole.valueOf(value.toString());
+ } catch (IllegalArgumentException e) {
+ log.warn("Unknown NLP processing role {} -> return null",value);
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
}
Propchange: stanbol/trunk/enhancer/generic/servicesapi/
------------------------------------------------------------------------------
Merged /incubator/stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi:r1386989-1388016
Merged /stanbol/branches/stanbol-nlp-processing/enhancer/generic/servicesapi:r1388017-1413353
Modified: stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java (original)
+++ stanbol/trunk/enhancer/generic/servicesapi/src/main/java/org/apache/stanbol/enhancer/servicesapi/ServiceProperties.java Mon Nov 26 11:39:25 2012
@@ -49,7 +49,15 @@ public interface ServiceProperties {
/**
* Ordering values < {@link ServiceProperties#ORDERING_PRE_PROCESSING} and
* >= this value indicate, that an enhancement engine performs operations
- * that are only dependent on the parsed content.
+ * that are only dependent on the parsed content.<p>
+ * <b>NOTE:</b> the NLP processing specific orderings that are defined
+ * within this span
+ * @see #ORDERING_NLP_LANGAUGE_DETECTION
+ * @see #ORDERING_NLP_SENTENCE_DETECTION
+ * @see #ORDERING_NLP_TOKENIZING
+ * @see #ORDERING_NLP_POS
+ * @see #ORDERING_NLP_CHUNK
+ * @See #ORDERING_NLP_LEMMATIZE
*/
Integer ORDERING_CONTENT_EXTRACTION = 100;
@@ -76,5 +84,46 @@ public interface ServiceProperties {
* operations on existing enhancements.
*/
Integer ORDERING_POST_PROCESSING = -100;
-
+
+ /* -------
+ * NLP processing orderings (all within the ORDERING_CONTENT_EXTRACTION range
+ * -------
+ */
+ /**
+ * Ordering values < {@link #ORDERING_PRE_PROCESSING} and >=
+ * {@link #ORDERING_NLP_LANGAUGE_DETECTION} are reserved for engines that detect
+ * the language of an content
+ */
+ Integer ORDERING_NLP_LANGAUGE_DETECTION = ServiceProperties.ORDERING_CONTENT_EXTRACTION + 90;
+ /**
+ * Ordering values < {@link #ORDERING_NLP_LANGAUGE_DETECTION} and >=
+ * {@link #ORDERING_NLP_SENTENCE_DETECTION} are reserved for engines that extract
+ * sections within the text content
+ */
+ Integer ORDERING_NLP_SENTENCE_DETECTION = ServiceProperties.ORDERING_CONTENT_EXTRACTION + 80;
+ /**
+ * Ordering values < {@link #ORDERING_NLP_SENTENCE_DETECTION} and >=
+ * {@link #ORDERING_NLP_TOKENIZING} are reserved for engines that tokenize
+ * the text
+ */
+ Integer ORDERING_NLP_TOKENIZING = ServiceProperties.ORDERING_CONTENT_EXTRACTION + 70;
+ /**
+ * Ordering values < {@link #ORDERING_NLP_TOKENIZING} and >=
+ * {@link #ORDERING_NLP_POS} are reserved for engines that perform
+ * POS (Part of Speech) tagging
+ */
+ Integer ORDERING_NLP_POS = ServiceProperties.ORDERING_CONTENT_EXTRACTION + 60;
+ /**
+ * Ordering values < {@link #ORDERING_NLP_POS} and >=
+ * {@link #ORDERING_NLP_CHUNK} are reserved for engines that annotate
+ * Chunks (such as Noun Phrases) in an text.
+ */
+ Integer ORDERING_NLP_CHUNK = ServiceProperties.ORDERING_CONTENT_EXTRACTION + 50;
+ /**
+ * Ordering values < {@link #ORDERING_NLP_CHUNK} and >=
+ * {@link #ORDERING_NLP_LEMMATIZE} are reserved for engines that lemmatize
+ * texts.<p>
+ * TODO: maybe this should be the same as {@link #ORDERING_NLP_TOKENIZING}
+ */
+ Integer ORDERING_NLP_LEMMATIZE = ServiceProperties.ORDERING_CONTENT_EXTRACTION + 40;
}
Modified: stanbol/trunk/enhancer/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/pom.xml?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/enhancer/pom.xml (original)
+++ stanbol/trunk/enhancer/pom.xml Mon Nov 26 11:39:25 2012
@@ -63,6 +63,7 @@
<module>generic/core</module>
<module>generic/test</module>
<module>generic/rdfentities</module>
+ <module>generic/nlp</module>
<module>jobmanager</module>
<module>chain/allactive</module>
Modified: stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/EnhancerConfigurationTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/EnhancerConfigurationTest.java?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/EnhancerConfigurationTest.java (original)
+++ stanbol/trunk/integration-tests/src/test/java/org/apache/stanbol/enhancer/it/EnhancerConfigurationTest.java Mon Nov 26 11:39:25 2012
@@ -22,7 +22,61 @@ import org.junit.Test;
public class EnhancerConfigurationTest extends EnhancerTestBase {
+ public static final String[] EXPECTED_ENGINES = new String[]{
+ "<rdf:Description rdf:about=\"http://localhost:.*/enhancer\">",
+ "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/enhancer#Enhancer\"/>",
+ "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/dbpediaLinking\"/>",
+ "<rdfs:label>dbpediaLinking</rdfs:label>",
+ "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/langid\"/>",
+ "<rdfs:label>langid</rdfs:label>",
+ "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/langdetect\"/>",
+ "<rdfs:label>langdetect</rdfs:label>",
+ "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/tika\"/>",
+ "<rdfs:label>tika</rdfs:label>",
+ "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/opennlp-sentence\"/>",
+ "<rdfs:label>opennlp-sentence</rdfs:label>",
+ "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/opennlp-token\"/>",
+ "<rdfs:label>opennlp-token</rdfs:label>",
+ "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/opennlp-pos\"/>",
+ "<rdfs:label>opennlp-pos</rdfs:label>",
+ "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/opennlp-ner\"/>",
+ "<rdfs:label>opennlp-ner</rdfs:label>",
+ "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/opennlp-chunker\"/>",
+ "<rdfs:label>opennlp-chunker</rdfs:label>",
+ "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/sentiment-wordclassifier\"/>",
+ "<rdfs:label>sentiment-wordclassifier</rdfs:label>",
+ "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/xmpextractor\"/>",
+ "<rdfs:label>xmpextractor</rdfs:label>",
+//NOT AVAILABLE DURING TESTS, BECAUSE OF OFFLINE MODE!
+// "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/dbpspotlightdisambiguate\"/>",
+// "<rdfs:label>dbpspotlightdisambiguate</rdfs:label>",
+// "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/dbpspotlightannotate\"/>",
+// "<rdfs:label>dbpspotlightannotate</rdfs:label>",
+// "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/dbpspotlightcandidates\"/>",
+// "<rdfs:label>dbpspotlightcandidates</rdfs:label>",
+// "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/dbpspotlightspot\"/>",
+// "<rdfs:label>dbpspotlightspot</rdfs:label>",
+ "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/enhancer#EnhancementEngine\"/>"
+ };
+ public static final String[] EXPECTED_CHAINS = new String[]{
+ "<j.0:hasChain rdf:resource=\"http://localhost:.*/enhancer/chain/default\"/>",
+ "<rdfs:label>default</rdfs:label>",
+ "<j.0:hasChain rdf:resource=\"http://localhost:.*/enhancer/chain/language\"/>",
+ "<rdfs:label>language</rdfs:label>",
+ "<j.0:hasChain rdf:resource=\"http://localhost:.*/enhancer/chain/dbpedia-proper-noun\"/>",
+ "<rdfs:label>dbpedia-proper-noun</rdfs:label>",
+ "<j.0:hasChain rdf:resource=\"http://localhost:.*/enhancer/chain/dbpedia-spotlight\"/>",
+ "<rdfs:label>dbpedia-spotlight</rdfs:label>",
+ "<j.0:hasChain rdf:resource=\"http://localhost:.*/enhancer/chain/all-active\"/>",
+ "<rdfs:label>all-active</rdfs:label>",
+ "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/enhancer#EnhancementChain\"/>",
+ };
+ public static final String[] EXPECTED_CONFIG = new String[EXPECTED_CHAINS.length+EXPECTED_ENGINES.length];
+ static {
+ System.arraycopy(EXPECTED_CHAINS, 0, EXPECTED_CONFIG, 0, EXPECTED_CHAINS.length);
+ System.arraycopy(EXPECTED_ENGINES, 0, EXPECTED_CONFIG, EXPECTED_CHAINS.length, EXPECTED_ENGINES.length);
+ }
@Test
public void testEnhancerConfig() throws IOException{
@@ -31,21 +85,7 @@ public class EnhancerConfigurationTest e
.withHeader("Accept","application/rdf+xml")
)
.assertStatus(200)
- .assertContentRegexp(
- "<rdf:Description rdf:about=\"http://localhost:.*/enhancer\">",
- "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/enhancer#Enhancer\"/>",
- "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/dbpediaLinking\"/>",
- "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/langid\"/>",
- "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/langdetect\"/>",
- "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/tika\"/>",
- "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/ner\"/>",
- "<j.0:hasChain rdf:resource=\"http://localhost:.*/enhancer/chain/default\"/>",
- "<j.0:hasChain rdf:resource=\"http://localhost:.*/enhancer/chain/language\"/>",
- "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/enhancer#EnhancementChain\"/>",
- "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/enhancer#EnhancementEngine\"/>",
- "<rdfs:label>ner</rdfs:label>",
- "<rdfs:label>language</rdfs:label>"
- );
+ .assertContentRegexp(EXPECTED_CONFIG);
}
@Test
public void testEngineConfig() throws IOException{
@@ -54,17 +94,7 @@ public class EnhancerConfigurationTest e
.withHeader("Accept","application/rdf+xml")
)
.assertStatus(200)
- .assertContentRegexp(
- "<rdf:Description rdf:about=\"http://localhost:.*/enhancer\">",
- "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/enhancer#Enhancer\"/>",
- "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/dbpediaLinking\"/>",
- "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/langid\"/>",
- "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/langdetect\"/>",
- "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/tika\"/>",
- "<j.0:hasEngine rdf:resource=\"http://localhost:.*/enhancer/engine/ner\"/>",
- "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/enhancer#EnhancementEngine\"/>",
- "<rdfs:label>ner</rdfs:label>"
- );
+ .assertContentRegexp(EXPECTED_ENGINES);
}
@Test
public void testChainConfig() throws IOException{
@@ -73,14 +103,7 @@ public class EnhancerConfigurationTest e
.withHeader("Accept","application/rdf+xml")
)
.assertStatus(200)
- .assertContentRegexp(
- "<rdf:Description rdf:about=\"http://localhost:.*/enhancer\">",
- "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/enhancer#Enhancer\"/>",
- "<j.0:hasChain rdf:resource=\"http://localhost:.*/enhancer/chain/default\"/>",
- "<j.0:hasChain rdf:resource=\"http://localhost:.*/enhancer/chain/language\"/>",
- "<rdf:type rdf:resource=\"http://stanbol.apache.org/ontology/enhancer/enhancer#EnhancementChain\"/>",
- "<rdfs:label>language</rdfs:label>"
- );
+ .assertContentRegexp(EXPECTED_CHAINS);
}
@Test
public void testSparqlConfig() throws IOException{
@@ -109,6 +132,7 @@ public class EnhancerConfigurationTest e
"<binding name=\"chain\">",
"<uri>http://localhost:.*/enhancer/chain/default</uri>",
"<uri>http://localhost:.*/enhancer/chain/language</uri>",
+ "<uri>http://localhost:.*/enhancer/chain/dbpedia-proper-noun</uri>",
"<binding name=\"name\">",
"<literal>default</literal>",
"<literal>language</literal>"
Modified: stanbol/trunk/launchers/bundlelists/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/launchers/bundlelists/pom.xml?rev=1413560&r1=1413559&r2=1413560&view=diff
==============================================================================
--- stanbol/trunk/launchers/bundlelists/pom.xml (original)
+++ stanbol/trunk/launchers/bundlelists/pom.xml Mon Nov 26 11:39:25 2012
@@ -64,6 +64,7 @@
<!-- Data bundles -->
<module>defaultdata</module>
<module>opennlp</module>
+ <module>sentimentdata</module>
<!-- Specific features -->
<module>authentication</module>
Propchange: stanbol/trunk/launchers/bundlelists/sentimentdata/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Nov 26 11:39:25 2012
@@ -0,0 +1,5 @@
+target
+
+.settings
+
+.project
Added: stanbol/trunk/launchers/bundlelists/sentimentdata/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/launchers/bundlelists/sentimentdata/pom.xml?rev=1413560&view=auto
==============================================================================
--- stanbol/trunk/launchers/bundlelists/sentimentdata/pom.xml (added)
+++ stanbol/trunk/launchers/bundlelists/sentimentdata/pom.xml Mon Nov 26 11:39:25 2012
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>stanbol-parent</artifactId>
+ <version>2-SNAPSHOT</version>
+ <relativePath>../../../parent</relativePath>
+ </parent>
+
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.launchers.bundlelists.sentimentdata</artifactId>
+ <version>1.1.0-SNAPSHOT</version>
+ <packaging>partialbundlelist</packaging>
+
+ <name>Apache Stanbol Data: Bundle List for Sentiment Data</name>
+ <description>
+ This Bundelist provides sentitemtn Dictionaries for variouse languages.
+ </description>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.sling</groupId>
+ <artifactId>maven-launchpad-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+</project>
+
Propchange: stanbol/trunk/launchers/bundlelists/sentimentdata/pom.xml
------------------------------------------------------------------------------
svn:executable = *
Added: stanbol/trunk/launchers/bundlelists/sentimentdata/src/main/bundles/list.xml
URL: http://svn.apache.org/viewvc/stanbol/trunk/launchers/bundlelists/sentimentdata/src/main/bundles/list.xml?rev=1413560&view=auto
==============================================================================
--- stanbol/trunk/launchers/bundlelists/sentimentdata/src/main/bundles/list.xml (added)
+++ stanbol/trunk/launchers/bundlelists/sentimentdata/src/main/bundles/list.xml Mon Nov 26 11:39:25 2012
@@ -0,0 +1,32 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<bundles>
+ <startLevel level="29">
+ <!-- Available Sentiment Models -->
+ <bundle>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.data.sentiment.sentiws</artifactId>
+ <version>1.1.0-SNAPSHOT</version>
+ </bundle>
+ <bundle>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.data.sentiment.sentiwordnet</artifactId>
+ <version>1.1.0-SNAPSHOT</version>
+ </bundle>
+ </startLevel>
+</bundles>
\ No newline at end of file