You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/10/29 11:14:04 UTC

svn commit: r1403223 - in /stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp: ./ src/main/java/org/apache/stanbol/enhancer/nlp/ src/main/java/org/apache/stanbol/enhancer/nlp/pos/ src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/ src/ma...

Author: rwesten
Date: Mon Oct 29 10:14:03 2012
New Revision: 1403223

URL: http://svn.apache.org/viewvc?rev=1403223&view=rev
Log:
STANBOL-734: Added support for all olia POS types to the Pos Enumeration. The top level is still represented by the LexicalCategory enum. The PosTag class now supports mappings based on the Pos enum. Also support for the POS type hierarchy was added. PosTags can also be assigned to multiple Pos types and/or a combination of LexcicalCategories/Pos types. Updated the Spanish, English and German TagSets to use Pos enum Mappings.

Modified:
    stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/pom.xml
    stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java
    stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java
    stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/PosTag.java
    stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/English.java
    stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/German.java
    stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/Spanish.java
    stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java
    stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NIFHelper.java

Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/pom.xml
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/pom.xml?rev=1403223&r1=1403222&r2=1403223&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/pom.xml (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/pom.xml Mon Oct 29 10:14:03 2012
@@ -82,6 +82,14 @@
       <plugin>
         <groupId>org.apache.felix</groupId>
         <artifactId>maven-scr-plugin</artifactId>
+          <configuration>
+            <!-- Need to exlude the Pos enum because it creates a
+                 IndexArrayOutOfBoundException - most likely because
+                 of character encoding issues  -->
+            <sourceExcludes>
+              **/enhancer/nlp/pos/Pos.java
+            </sourceExcludes>
+          </configuration>
       </plugin>
     </plugins>
   </build>

Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java?rev=1403223&r1=1403222&r2=1403223&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java Mon Oct 29 10:14:03 2012
@@ -52,23 +52,26 @@ public interface NlpAnnotations {
      */
     Annotation<String,MorphoFeatures> MORPHO_ANNOTATION = new Annotation<String,MorphoFeatures>(
             "stanbol.enhancer.nlp.morpho",MorphoFeatures.class);
-    
-    Annotation<String,CaseTag> CASE_ANNOTATION = new Annotation<String,CaseTag>(
-            "stanbol.enhancer.nlp.morpho.case",CaseTag.class);
 
-    Annotation<String,GenderTag> GENDER_ANNOTATION = new Annotation<String,GenderTag>(
-            "stanbol.enhancer.nlp.morpho.gender",GenderTag.class);
-
-    Annotation<String,NumberTag> NUMBER_ANNOTATION = new Annotation<String,NumberTag>(
-            "stanbol.enhancer.nlp.morpho.number",NumberTag.class);
-    
-    Annotation<String,PersonTag> PERSON_ANNOTATION = new Annotation<String,PersonTag>(
-            "stanbol.enhancer.nlp.morpho.person",PersonTag.class);
-
-    Annotation<String,TenseTag> TENSE_ANNOTATION = new Annotation<String,TenseTag>(
-            "stanbol.enhancer.nlp.morpho.tense",TenseTag.class);
-
-    Annotation<String,VerbMoodTag> VERB_MOOD_ANNOTATION = new Annotation<String,VerbMoodTag>(
-            "stanbol.enhancer.nlp.morpho.verb-mood",VerbMoodTag.class);
+    /* 
+     * Currently only used as part of MorphoFeatures
+     */
+//    Annotation<String,CaseTag> CASE_ANNOTATION = new Annotation<String,CaseTag>(
+//            "stanbol.enhancer.nlp.morpho.case",CaseTag.class);
+//
+//    Annotation<String,GenderTag> GENDER_ANNOTATION = new Annotation<String,GenderTag>(
+//            "stanbol.enhancer.nlp.morpho.gender",GenderTag.class);
+//
+//    Annotation<String,NumberTag> NUMBER_ANNOTATION = new Annotation<String,NumberTag>(
+//            "stanbol.enhancer.nlp.morpho.number",NumberTag.class);
+//    
+//    Annotation<String,PersonTag> PERSON_ANNOTATION = new Annotation<String,PersonTag>(
+//            "stanbol.enhancer.nlp.morpho.person",PersonTag.class);
+//
+//    Annotation<String,TenseTag> TENSE_ANNOTATION = new Annotation<String,TenseTag>(
+//            "stanbol.enhancer.nlp.morpho.tense",TenseTag.class);
+//
+//    Annotation<String,VerbMoodTag> VERB_MOOD_ANNOTATION = new Annotation<String,VerbMoodTag>(
+//            "stanbol.enhancer.nlp.morpho.verb-mood",VerbMoodTag.class);
 
 }

Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java?rev=1403223&r1=1403222&r2=1403223&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java Mon Oct 29 10:14:03 2012
@@ -2,6 +2,10 @@ package org.apache.stanbol.enhancer.nlp.
 
 import org.apache.clerezza.rdf.core.UriRef;
 
+/**
+ * Lexical categories used by the Stanbol Enhancer NLP module. Defined based on the top level
+ * MorphosyntacticCategory as defined by the <a heref="http://olia.nlp2rdf.org/">Olia</a> Ontology.
+ */
 public enum LexicalCategory {
     /**
      * A noun, or noun substantive, is a part of speech (a word or phrase) which can co-occur with
@@ -89,20 +93,23 @@ public enum LexicalCategory {
      * categories. (http://www.ilc.cnr.it/EAGLES96/annotate/node16.html#mp 19.09.06)
      */
     Unique,
-    /**
-     * A numeral is a word, functioning most typically as an adjective or pronoun, 	that expresses a number, 
-     * and relation to the number, such as one of the following: Quantity, Sequence, Frequency, Fraction.
-	 * (http://www.sil.org/linguistics/GlossaryOfLinguisticTerms/WhatIsANumeral.htm 19.09.06)
-     */
-    Numeral,
-    /**
-     * Clitic Element covers only one aspect of the original MULTEXT-East (and ISOcat) definitions of cliticness, i.e., that an element is a clitic
-     */
-    Clitic,
-    /**
-     * Proper nouns (also called proper names) are the names of unique entities. (http://en.wikipedia.org/wiki/Noun 19.09.06)
-     */
-    ProperNoun,;
+//    /**
+//     * A numeral is a word, functioning most typically as an adjective or pronoun, 	that expresses a number, 
+//     * and relation to the number, such as one of the following: Quantity, Sequence, Frequency, Fraction.
+//	 * (http://www.sil.org/linguistics/GlossaryOfLinguisticTerms/WhatIsANumeral.htm 19.09.06)
+//     */
+//    Numeral,
+// not present in OLIA
+//    /**
+//     * Clitic Element covers only one aspect of the original MULTEXT-East (and ISOcat) definitions of 
+//     * cliticness, i.e., that an element is a clitic
+//     */
+//    Clitic,
+//    /**
+//     * Proper nouns (also called proper names) are the names of unique entities. (http://en.wikipedia.org/wiki/Noun 19.09.06)
+//     */
+//    ProperNoun,
+    ;
     static final String OLIA_NAMESPACE = "http://purl.org/olia/olia.owl#";
 
     UriRef uri;

Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/PosTag.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/PosTag.java?rev=1403223&r1=1403222&r2=1403223&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/PosTag.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/PosTag.java Mon Oct 29 10:14:03 2012
@@ -1,5 +1,10 @@
 package org.apache.stanbol.enhancer.nlp.pos;
 
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.Set;
+
 import org.apache.stanbol.enhancer.nlp.model.Token;
 import org.apache.stanbol.enhancer.nlp.model.tag.Tag;
 import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
@@ -16,53 +21,185 @@ import org.apache.stanbol.enhancer.servi
  */
 public class PosTag extends Tag<PosTag>{
     
-    private final LexicalCategory category;
     /**
-     * Creates a new POS tag for the parsed tag. The created Tag is not
-     * assigned to any {@link LexicalCategory}.<p> This constructor can be used
-     * by {@link EnhancementEngine}s that encounter an Tag they do not know 
-     * (e.g. that is not defined by the configured {@link TagSet}).<p>
-     * @param tag the Tag
+     * The {@link LexicalCategory LexicalCategories} applying to this PosTag
+     */
+    private final Set<LexicalCategory> category;
+    /**
+     * The mapped {@link Pos} tags. Empty if none are mapped
+     */
+    private final Set<Pos> pos;
+    /**
+     * NOTE: NULL if {@link #pos} is empty!
+     */
+    private final Set<Pos> posHierarchy;
+//    /**
+//     * Creates a new POS tag for the parsed tag. The created Tag is not
+//     * assigned to any {@link LexicalCategory}.<p> This constructor can be used
+//     * by {@link EnhancementEngine}s that encounter an Tag they do not know 
+//     * (e.g. that is not defined by the configured {@link TagSet}).<p>
+//     * @param tag the Tag
+//     * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+//     * or empty.
+//     */
+//    public PosTag(String tag){
+//        this(tag,(LexicalCategory)null);
+//    }
+    /**
+     * Creates a PosTag that is assigned to a {@link LexicalCategory}
+     * @param tag the tag
+     * @param category the lexical categor(ies) mapped to the tag
      * @throws IllegalArgumentException if the parsed tag is <code>null</code>
      * or empty.
      */
-    public PosTag(String tag){
-        this(tag,null);
+    public PosTag(String tag,LexicalCategory...category){
+        super(tag);
+        this.category = EnumSet.noneOf(LexicalCategory.class);
+        if(category != null){
+            this.category.addAll(Arrays.asList(category));
+        }
+        this.pos = Collections.emptySet();
+        this.posHierarchy = Collections.emptySet();
     }
     /**
      * Creates a PosTag that is assigned to a {@link LexicalCategory}
      * @param tag the tag
-     * @param category the lexical category or <code>null</code> if not known
+     * @param pos a concrete {@link Pos} mapped to the string
+     * @param furtherPos allows to add additional {@link Pos} mappings
      * @throws IllegalArgumentException if the parsed tag is <code>null</code>
      * or empty.
      */
-    public PosTag(String tag,LexicalCategory category){
+    public PosTag(String tag,Pos pos,Pos...furtherPos){
+        this(tag, null,pos,furtherPos);
+    }
+    
+    public PosTag(String tag,LexicalCategory category, Pos pos,Pos...furtherPos){
         super(tag);
-        this.category = category;
+        if(pos != null){
+            if(furtherPos == null || furtherPos.length < 1){
+                this.pos = Collections.singleton(pos);
+                this.posHierarchy = pos.hierarchy();
+                if(category == null){
+                    this.category = pos.categories();
+                } else {
+                    this.category = EnumSet.of(category);
+                    this.category.addAll(pos.categories());
+                }
+            } else { // in case of multiple Pos Tags
+                this.pos = EnumSet.of(pos,furtherPos);
+                //we need to collect categories
+                this.category = category == null ? 
+                        EnumSet.noneOf(LexicalCategory.class) :
+                            EnumSet.of(category);
+                //and the union over the pos parents
+                this.posHierarchy = EnumSet.noneOf(Pos.class);
+                for(Pos p : this.pos){
+                    this.posHierarchy.addAll(p.hierarchy());
+                    this.category.addAll(p.categories());
+                }
+            }
+        } else {
+            if(furtherPos != null && furtherPos.length > 0){
+                throw new IllegalArgumentException("furtherPos parameter MUST BE NULL "
+                    + "or empty if the pos parameter is NULL!");
+            }
+            this.category = category == null ? 
+                    Collections.EMPTY_SET : Collections.singleton(category);
+            this.pos = Collections.emptySet();
+            this.posHierarchy = Collections.emptySet();
+        }
     }
     /**
-     * The LecxialCategory of this tag (if known)
-     * @return the category or <code>null</code> if not mapped to any
+     * The {@link LexicalCategory LexicalCategories} of this tag
+     * @return the {@link LexicalCategory LexicalCategories} or an
+     * empty {@link Set} if the string {@link #getTag() tag} is 
+     * not mapped.
      */
-    public LexicalCategory getCategory(){
+    public Set<LexicalCategory> getCategories(){
        return category; 
     }
     
+    /**
+     * Checks if this {@link PosTag} is mapped to the parsed
+     * {@link LexicalCategory}
+     * @param category the category
+     * @return <code>true</code> if this PosTag is mapped to
+     * the parsed category.
+     */
+    public boolean hasCategory(LexicalCategory category){
+        return this.category.contains(category);
+    }
+    
+    /**
+     * Checks if the {@link PosTag} is of the parsed {@link Pos}
+     * tag. This also considers the transitive hierarchy of
+     * the {@link Pos} enum.
+     * @param pos the {@link Pos} to check
+     * @return <code>true</code> if this PosTag is mapped to
+     * the parsed {@link Pos}.
+     */
+    public boolean hasPos(Pos pos){
+        return this.pos.isEmpty() ? false : 
+            posHierarchy.contains(pos);
+    }
+    /**
+     * Returns <code>true</code> if this PosTag is mapped to a
+     * {@link LexicalCategory} or a {@link Pos} type as defined
+     * by the <a herf="">Olia</a> Ontology
+     * @return
+     */
+    public boolean isMapped() {
+        return !category.isEmpty();
+    }
+    
+    /**
+     * Getter for the {@link Pos} mapped to this PosTag
+     * @return the mapped {@link Pos} mapped to the string
+     * string {@link #getTag() tag} or an empty set of not
+     * mapped. This are the directly mapped {@link Pos} types
+     * and does not include the parent Pos types.
+     */
+    public Set<Pos> getPos() {
+        return pos;
+    }
+    
+    public Set<Pos> getPosHierarchy(){
+        return posHierarchy;
+    }
+    
     @Override
     public String toString() {
-        return String.format("POS %s (%s)", tag,
-            category == null ? "none" : category.name());
+        StringBuilder sb = new StringBuilder("pos: ");
+        sb.append(tag);
+        if(pos != null || !category.isEmpty()){
+            sb.append('(');
+            if(!pos.isEmpty()){
+                if(pos.size() == 1){
+                    sb.append(pos.iterator().next());//.name());
+                } else {
+                    sb.append(pos);
+                }
+                sb.append('|');
+            }
+            if(category.size() == 1){
+                sb.append(category.iterator().next());//.name());
+            } else {
+                sb.append(category);
+            }
+            sb.append(')');
+        }
+        return sb.toString();
     }
     
     @Override
     public int hashCode() {
-        return tag.hashCode();
+        return tag.hashCode() + category.hashCode() + pos.hashCode();
     }
     
     @Override
     public boolean equals(Object obj) {
         return super.equals(obj) && obj instanceof PosTag &&
-            (category == null && ((PosTag)obj).category == null) ||
-                    (category != null && category.equals(((PosTag)obj).category));
+                category.equals(((PosTag)obj).category) &&
+                pos.equals(((PosTag)obj).pos);
     }
 }

Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/English.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/English.java?rev=1403223&r1=1403222&r2=1403223&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/English.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/English.java Mon Oct 29 10:14:03 2012
@@ -3,6 +3,7 @@ package org.apache.stanbol.enhancer.nlp.
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
 import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.pos.Pos;
 import org.apache.stanbol.enhancer.nlp.pos.PosTag;
 
 
@@ -27,57 +28,57 @@ public final class English {
         PENN_TREEBANK.getProperties().put("olia.linkingModel", 
             new UriRef("http://purl.org/olia/penn-link.rdf"));
 
-        PENN_TREEBANK.addTag(new PosTag("CC", LexicalCategory.Conjuction));
-        PENN_TREEBANK.addTag(new PosTag("CD",LexicalCategory.Quantifier));
-        PENN_TREEBANK.addTag(new PosTag("DT",LexicalCategory.PronounOrDeterminer));
-        PENN_TREEBANK.addTag(new PosTag("EX",null)); //existential there
-        PENN_TREEBANK.addTag(new PosTag("FW",LexicalCategory.Noun)); //TODO check
-        PENN_TREEBANK.addTag(new PosTag("IN",LexicalCategory.Adjective));
+        PENN_TREEBANK.addTag(new PosTag("CC", Pos.CoordinatingConjunction));
+        PENN_TREEBANK.addTag(new PosTag("CD",Pos.CardinalNumber));
+        PENN_TREEBANK.addTag(new PosTag("DT",Pos.Determiner));
+        PENN_TREEBANK.addTag(new PosTag("EX",Pos.ExistentialParticle)); //TODO: unsure mapping
+        PENN_TREEBANK.addTag(new PosTag("FW",Pos.Foreign));
+        PENN_TREEBANK.addTag(new PosTag("IN",Pos.Preposition, Pos.SubordinatingConjunction));
         PENN_TREEBANK.addTag(new PosTag("JJ",LexicalCategory.Adjective));
-        PENN_TREEBANK.addTag(new PosTag("JJR",LexicalCategory.Adjective));
-        PENN_TREEBANK.addTag(new PosTag("JJS",LexicalCategory.Adjective));
-        PENN_TREEBANK.addTag(new PosTag("LS",null));
-        PENN_TREEBANK.addTag(new PosTag("MD",LexicalCategory.Noun));
-        PENN_TREEBANK.addTag(new PosTag("NN",LexicalCategory.Noun));
-        PENN_TREEBANK.addTag(new PosTag("NNP",LexicalCategory.Noun));
-        PENN_TREEBANK.addTag(new PosTag("NNPS",LexicalCategory.Noun));
-        PENN_TREEBANK.addTag(new PosTag("NNS",LexicalCategory.Noun));
-        PENN_TREEBANK.addTag(new PosTag("PDT",LexicalCategory.PronounOrDeterminer));
-        PENN_TREEBANK.addTag(new PosTag("POS",null));
-        PENN_TREEBANK.addTag(new PosTag("PP",LexicalCategory.PronounOrDeterminer));
-        PENN_TREEBANK.addTag(new PosTag("PP$",LexicalCategory.PronounOrDeterminer));
-        PENN_TREEBANK.addTag(new PosTag("PRP",LexicalCategory.PronounOrDeterminer));
-        PENN_TREEBANK.addTag(new PosTag("PRP$",LexicalCategory.PronounOrDeterminer));
+        PENN_TREEBANK.addTag(new PosTag("JJR",LexicalCategory.Adjective, Pos.ComparativeParticle));
+        PENN_TREEBANK.addTag(new PosTag("JJS",LexicalCategory.Adjective, Pos.SuperlativeParticle));
+        PENN_TREEBANK.addTag(new PosTag("LS",Pos.ListMarker));
+        PENN_TREEBANK.addTag(new PosTag("MD",Pos.ModalVerb));
+        PENN_TREEBANK.addTag(new PosTag("NN",Pos.CommonNoun, Pos.SingularQuantifier));
+        PENN_TREEBANK.addTag(new PosTag("NNP",Pos.ProperNoun, Pos.SingularQuantifier));
+        PENN_TREEBANK.addTag(new PosTag("NNPS",Pos.ProperNoun, Pos.PluralQuantifier));
+        PENN_TREEBANK.addTag(new PosTag("NNS",Pos.CommonNoun, Pos.PluralQuantifier));
+        PENN_TREEBANK.addTag(new PosTag("PDT",Pos.Determiner)); //TODO should be Pre-Determiner
+        PENN_TREEBANK.addTag(new PosTag("POS")); //TODO: map Possessive Ending (e.g., Nouns ending in 's)
+        PENN_TREEBANK.addTag(new PosTag("PP",Pos.PersonalPronoun));
+        PENN_TREEBANK.addTag(new PosTag("PP$",Pos.PossessivePronoun));
+        PENN_TREEBANK.addTag(new PosTag("PRP",Pos.PersonalPronoun));
+        PENN_TREEBANK.addTag(new PosTag("PRP$",Pos.PossessivePronoun));
         PENN_TREEBANK.addTag(new PosTag("RB",LexicalCategory.Adverb));
-        PENN_TREEBANK.addTag(new PosTag("RBR",LexicalCategory.Adverb));
-        PENN_TREEBANK.addTag(new PosTag("RBS",LexicalCategory.Adverb));
-        PENN_TREEBANK.addTag(new PosTag("RP",null));
-        PENN_TREEBANK.addTag(new PosTag("SYM",LexicalCategory.Residual));
+        PENN_TREEBANK.addTag(new PosTag("RBR",LexicalCategory.Adverb,Pos.ComparativeParticle));
+        PENN_TREEBANK.addTag(new PosTag("RBS",LexicalCategory.Adverb,Pos.SuperlativeParticle));
+        PENN_TREEBANK.addTag(new PosTag("RP",Pos.Participle));
+        PENN_TREEBANK.addTag(new PosTag("SYM",Pos.Symbol));
         PENN_TREEBANK.addTag(new PosTag("TO",LexicalCategory.Adposition));
         PENN_TREEBANK.addTag(new PosTag("UH",LexicalCategory.Interjection));
-        PENN_TREEBANK.addTag(new PosTag("VB",LexicalCategory.Verb));
-        PENN_TREEBANK.addTag(new PosTag("VBD",LexicalCategory.Verb));
-        PENN_TREEBANK.addTag(new PosTag("VBG",LexicalCategory.Verb));
-        PENN_TREEBANK.addTag(new PosTag("VBN",LexicalCategory.Verb));
-        PENN_TREEBANK.addTag(new PosTag("VBP",LexicalCategory.Verb));
-        PENN_TREEBANK.addTag(new PosTag("VBZ",LexicalCategory.Verb));
-        PENN_TREEBANK.addTag(new PosTag("WDT",LexicalCategory.PronounOrDeterminer));
-        PENN_TREEBANK.addTag(new PosTag("WP",LexicalCategory.PronounOrDeterminer));
-        PENN_TREEBANK.addTag(new PosTag("WP$",LexicalCategory.PronounOrDeterminer));
-        PENN_TREEBANK.addTag(new PosTag("WRB",LexicalCategory.Adverb));
-        PENN_TREEBANK.addTag(new PosTag("´´",LexicalCategory.Punctuation));
-        PENN_TREEBANK.addTag(new PosTag(":",LexicalCategory.Punctuation));
-        PENN_TREEBANK.addTag(new PosTag(",",LexicalCategory.Punctuation));
+        PENN_TREEBANK.addTag(new PosTag("VB",Pos.Infinitive)); //TODO check a Verb in the base form should be Pos.Infinitive
+        PENN_TREEBANK.addTag(new PosTag("VBD",Pos.PastParticiple)); //TODO check
+        PENN_TREEBANK.addTag(new PosTag("VBG",Pos.PresentParticiple,Pos.Gerund));
+        PENN_TREEBANK.addTag(new PosTag("VBN",Pos.PastParticiple));
+        PENN_TREEBANK.addTag(new PosTag("VBP",Pos.PresentParticiple));
+        PENN_TREEBANK.addTag(new PosTag("VBZ",Pos.PresentParticiple));
+        PENN_TREEBANK.addTag(new PosTag("WDT",Pos.WHDeterminer));
+        PENN_TREEBANK.addTag(new PosTag("WP",Pos.WHPronoun));
+        PENN_TREEBANK.addTag(new PosTag("WP$",Pos.PossessivePronoun, Pos.WHPronoun));
+        PENN_TREEBANK.addTag(new PosTag("WRB",Pos.WHTypeAdverbs));
+        PENN_TREEBANK.addTag(new PosTag("´´",Pos.CloseQuote));
+        PENN_TREEBANK.addTag(new PosTag(":",Pos.Colon));
+        PENN_TREEBANK.addTag(new PosTag(",",Pos.Comma));
         PENN_TREEBANK.addTag(new PosTag("$",LexicalCategory.Residual));
-        PENN_TREEBANK.addTag(new PosTag("\"",LexicalCategory.Punctuation));
-        PENN_TREEBANK.addTag(new PosTag("``",LexicalCategory.Punctuation));
-        PENN_TREEBANK.addTag(new PosTag(".",LexicalCategory.Punctuation));
-        PENN_TREEBANK.addTag(new PosTag("{",LexicalCategory.Punctuation));
-        PENN_TREEBANK.addTag(new PosTag("}",LexicalCategory.Punctuation));
-        PENN_TREEBANK.addTag(new PosTag("[",LexicalCategory.Punctuation));
-        PENN_TREEBANK.addTag(new PosTag("]",LexicalCategory.Punctuation));
-        PENN_TREEBANK.addTag(new PosTag("(",LexicalCategory.Punctuation));
-        PENN_TREEBANK.addTag(new PosTag(")",LexicalCategory.Punctuation));
+        PENN_TREEBANK.addTag(new PosTag("\"",Pos.Quote));
+        PENN_TREEBANK.addTag(new PosTag("``",Pos.OpenQuote));
+        PENN_TREEBANK.addTag(new PosTag(".",Pos.Point));
+        PENN_TREEBANK.addTag(new PosTag("{",Pos.OpenCurlyBracket));
+        PENN_TREEBANK.addTag(new PosTag("}",Pos.CloseCurlyBracket));
+        PENN_TREEBANK.addTag(new PosTag("[",Pos.OpenSquareBracket));
+        PENN_TREEBANK.addTag(new PosTag("]",Pos.CloseSquareBracket));
+        PENN_TREEBANK.addTag(new PosTag("(",Pos.OpenParenthesis));
+        PENN_TREEBANK.addTag(new PosTag(")",Pos.CloseParenthesis));
     }
     
 }

Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/German.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/German.java?rev=1403223&r1=1403222&r2=1403223&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/German.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/German.java Mon Oct 29 10:14:03 2012
@@ -3,6 +3,7 @@ package org.apache.stanbol.enhancer.nlp.
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
 import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.pos.Pos;
 import org.apache.stanbol.enhancer.nlp.pos.PosTag;
 
 /**
@@ -26,63 +27,63 @@ public final class German {
             new UriRef("http://purl.org/olia/stts.owl"));
         STTS.getProperties().put("olia.linkingModel", 
             new UriRef("http://purl.org/olia/stts-link.rdf"));
-        STTS.addTag(new PosTag("ADJA", LexicalCategory.Adjective));
-        STTS.addTag(new PosTag("ADJD", LexicalCategory.Adjective));
+        STTS.addTag(new PosTag("ADJA", Pos.AttributiveAdjective));
+        STTS.addTag(new PosTag("ADJD", Pos.PredicativeAdjective));
         STTS.addTag(new PosTag("ADV", LexicalCategory.Adverb));
-        STTS.addTag(new PosTag("APPR", LexicalCategory.Adposition));
-        STTS.addTag(new PosTag("APPRART", LexicalCategory.Adposition));
-        STTS.addTag(new PosTag("APPO", LexicalCategory.Adposition));
-        STTS.addTag(new PosTag("APZR", LexicalCategory.Adposition));
-        STTS.addTag(new PosTag("ART", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("CARD", LexicalCategory.Quantifier));
-        STTS.addTag(new PosTag("FM", LexicalCategory.Noun));
+        STTS.addTag(new PosTag("APPR", Pos.Preposition));
+        STTS.addTag(new PosTag("APPRART", Pos.FusedPrepArt));
+        STTS.addTag(new PosTag("APPO", Pos.Postposition));
+        STTS.addTag(new PosTag("APZR", Pos.Circumposition));
+        STTS.addTag(new PosTag("ART", Pos.Article));
+        STTS.addTag(new PosTag("CARD", Pos.CardinalNumber));
+        STTS.addTag(new PosTag("FM", Pos.Foreign));
         STTS.addTag(new PosTag("ITJ", LexicalCategory.Interjection));
-        STTS.addTag(new PosTag("KOUI", LexicalCategory.Conjuction));
-        STTS.addTag(new PosTag("KOUS", LexicalCategory.Conjuction));
-        STTS.addTag(new PosTag("KON", LexicalCategory.Conjuction));
-        STTS.addTag(new PosTag("KOKOM", LexicalCategory.Conjuction));
-        STTS.addTag(new PosTag("NN", LexicalCategory.Noun));
-        STTS.addTag(new PosTag("NE", LexicalCategory.Noun));
-        STTS.addTag(new PosTag("PDS", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PDAT", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PIS", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PIAT", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PIDAT", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PPER", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PPOSS", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PPOSAT", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PRELS", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PRELAT", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PRF", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PWS", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PWAT", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PWAV", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PAV", LexicalCategory.PronounOrDeterminer));
+        STTS.addTag(new PosTag("KOUI", Pos.SubordinatingConjunction));
+        STTS.addTag(new PosTag("KOUS", Pos.SubordinatingConjunctionWithFiniteClause));
+        STTS.addTag(new PosTag("KON", Pos.CoordinatingConjunction));
+        STTS.addTag(new PosTag("KOKOM", Pos.ComparativeParticle));
+        STTS.addTag(new PosTag("NN", Pos.CommonNoun));
+        STTS.addTag(new PosTag("NE", Pos.ProperNoun));
+        STTS.addTag(new PosTag("PDS", Pos.DemonstrativePronoun,Pos.SubstitutivePronoun));
+        STTS.addTag(new PosTag("PDAT", Pos.DemonstrativePronoun, Pos.AttributivePronoun));
+        STTS.addTag(new PosTag("PIS", Pos.SubstitutivePronoun, Pos.IndefinitePronoun));
+        STTS.addTag(new PosTag("PIAT",  Pos.AttributivePronoun, Pos.IndefinitePronoun));
+        STTS.addTag(new PosTag("PIDAT", Pos.AttributivePronoun, Pos.IndefinitePronoun));
+        STTS.addTag(new PosTag("PPER", Pos.PersonalPronoun));
+        STTS.addTag(new PosTag("PPOSS", Pos.SubstitutivePronoun, Pos.PossessivePronoun));
+        STTS.addTag(new PosTag("PPOSAT", Pos.AttributivePronoun, Pos.PossessivePronoun));
+        STTS.addTag(new PosTag("PRELS", Pos.SubstitutivePronoun, Pos.RelativePronoun));
+        STTS.addTag(new PosTag("PRELAT", Pos.AttributivePronoun, Pos.RelativePronoun));
+        STTS.addTag(new PosTag("PRF", Pos.ReflexivePronoun));
+        STTS.addTag(new PosTag("PWS", Pos.SubstitutivePronoun, Pos.InterrogativePronoun));
+        STTS.addTag(new PosTag("PWAT", Pos.AttributivePronoun, Pos.InterrogativePronoun));
+        STTS.addTag(new PosTag("PWAV", LexicalCategory.Adverb, Pos.RelativePronoun, Pos.InterrogativePronoun));
+        STTS.addTag(new PosTag("PAV", Pos.PronominalAdverb));
         //Tiger-STTS for PAV
-        STTS.addTag(new PosTag("PROAV", LexicalCategory.PronounOrDeterminer));
-        STTS.addTag(new PosTag("PTKA", LexicalCategory.Unique));
-        STTS.addTag(new PosTag("PTKANT", LexicalCategory.Unique));
-        STTS.addTag(new PosTag("PTKNEG", LexicalCategory.Unique));
-        STTS.addTag(new PosTag("PTKVZ", LexicalCategory.Unique));
-        STTS.addTag(new PosTag("PTKZU", LexicalCategory.Unique)); //particle "zu"  e.g. "zu [gehen]".
-        STTS.addTag(new PosTag("TRUNC", null)); //e.g. An- [und Abreise] 
-        STTS.addTag(new PosTag("VVIMP", LexicalCategory.Verb));
-        STTS.addTag(new PosTag("VVINF", LexicalCategory.Verb));
-        STTS.addTag(new PosTag("VVFIN", LexicalCategory.Verb));
-        STTS.addTag(new PosTag("VVIZU", LexicalCategory.Verb));
-        STTS.addTag(new PosTag("VVPP", LexicalCategory.Verb));
-        STTS.addTag(new PosTag("VAFIN", LexicalCategory.Verb));
-        STTS.addTag(new PosTag("VAIMP", LexicalCategory.Verb));
-        STTS.addTag(new PosTag("VAINF", LexicalCategory.Verb));
-        STTS.addTag(new PosTag("VAPP", LexicalCategory.Verb));
-        STTS.addTag(new PosTag("VMFIN", LexicalCategory.Verb));
-        STTS.addTag(new PosTag("VMINF", LexicalCategory.Verb));
-        STTS.addTag(new PosTag("VMPP", LexicalCategory.Verb));
-        STTS.addTag(new PosTag("XY", null)); //non words (e.g. H20, 3:7 ...)
-        STTS.addTag(new PosTag("$.", LexicalCategory.Punctuation));
-        STTS.addTag(new PosTag("$,", LexicalCategory.Punctuation));
-        STTS.addTag(new PosTag("$(", LexicalCategory.Punctuation));
+        STTS.addTag(new PosTag("PROAV", Pos.PronominalAdverb));
+        STTS.addTag(new PosTag("PTKA", Pos.AdjectivalParticle));
+        STTS.addTag(new PosTag("PTKANT", Pos.Particle));
+        STTS.addTag(new PosTag("PTKNEG", Pos.NegativeParticle));
+        STTS.addTag(new PosTag("PTKVZ", Pos.VerbalParticle));
+        STTS.addTag(new PosTag("PTKZU", Pos.Particle)); //particle "zu"  e.g. "zu [gehen]".
+        STTS.addTag(new PosTag("TRUNC", Pos.Abbreviation)); //e.g. An- [und Abreise] 
+        STTS.addTag(new PosTag("VVIMP", Pos.ImperativeVerb));
+        STTS.addTag(new PosTag("VVINF", Pos.Infinitive));
+        STTS.addTag(new PosTag("VVFIN", Pos.FiniteVerb));
+        STTS.addTag(new PosTag("VVIZU", Pos.Infinitive));
+        STTS.addTag(new PosTag("VVPP", Pos.PastParticiple));
+        STTS.addTag(new PosTag("VAFIN", Pos.FiniteVerb, Pos.AuxiliaryVerb));
+        STTS.addTag(new PosTag("VAIMP", Pos.AuxiliaryVerb, Pos.ImperativeVerb));
+        STTS.addTag(new PosTag("VAINF", Pos.AuxiliaryVerb, Pos.Infinitive));
+        STTS.addTag(new PosTag("VAPP", Pos.PastParticiple, Pos.AuxiliaryVerb));
+        STTS.addTag(new PosTag("VMFIN", Pos.FiniteVerb, Pos.ModalVerb));
+        STTS.addTag(new PosTag("VMINF", Pos.Infinitive, Pos.ModalVerb));
+        STTS.addTag(new PosTag("VMPP", Pos.PastParticiple, Pos.ModalVerb));
+        STTS.addTag(new PosTag("XY")); //non words (e.g. H20, 3:7 ...)
+        STTS.addTag(new PosTag("$.", Pos.Point));
+        STTS.addTag(new PosTag("$,", Pos.Comma));
+        STTS.addTag(new PosTag("$(", Pos.ParentheticalPunctuation));
         //Normal nouns in named entities (not in stts 1999)
-        STTS.addTag(new PosTag("NNE", LexicalCategory.Noun));
+        STTS.addTag(new PosTag("NNE", Pos.ProperNoun)); //TODO maybe map to common non
     }
 }

Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/Spanish.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/Spanish.java?rev=1403223&r1=1403222&r2=1403223&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/Spanish.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/olia/Spanish.java Mon Oct 29 10:14:03 2012
@@ -3,6 +3,7 @@ package org.apache.stanbol.enhancer.nlp.
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
 import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.pos.Pos;
 import org.apache.stanbol.enhancer.nlp.pos.PosTag;
 
 public class Spanish {
@@ -25,70 +26,70 @@ public class Spanish {
 //        PAROLE.getProperties().put("olia.linkingModel", 
 //            new UriRef("http://purl.org/olia/???"));
         PAROLE.addTag(new PosTag("AO", LexicalCategory.Adjective));
-        PAROLE.addTag(new PosTag("AQ", LexicalCategory.Adjective));
-        PAROLE.addTag(new PosTag("CC", LexicalCategory.Conjuction));
-        PAROLE.addTag(new PosTag("CS", LexicalCategory.Conjuction));
-        PAROLE.addTag(new PosTag("DA", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("DD", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("DE", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("DI", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("DN", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("DP", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("DT", LexicalCategory.PronounOrDeterminer));
+        PAROLE.addTag(new PosTag("AQ", Pos.QualifierAdjective));
+        PAROLE.addTag(new PosTag("CC", Pos.CoordinatingConjunction));
+        PAROLE.addTag(new PosTag("CS", Pos.SubordinatingConjunction));
+        PAROLE.addTag(new PosTag("DA", Pos.Article));
+        PAROLE.addTag(new PosTag("DD", Pos.DemonstrativeDeterminer));
+        PAROLE.addTag(new PosTag("DE", Pos.ExclamatoryDeterminer));
+        PAROLE.addTag(new PosTag("DI", Pos.IndefiniteDeterminer));
+        PAROLE.addTag(new PosTag("DN", Pos.Numeral,Pos.Determiner));
+        PAROLE.addTag(new PosTag("DP", Pos.PossessiveDeterminer));
+        PAROLE.addTag(new PosTag("DT", Pos.InterrogativeDeterminer));
         PAROLE.addTag(new PosTag("Faa", LexicalCategory.Punctuation));
-        PAROLE.addTag(new PosTag("Fat", LexicalCategory.Punctuation));
-        PAROLE.addTag(new PosTag("Fc", LexicalCategory.Punctuation));
-        PAROLE.addTag(new PosTag("Fd", LexicalCategory.Punctuation));
-        PAROLE.addTag(new PosTag("Fe", LexicalCategory.Punctuation));
-        PAROLE.addTag(new PosTag("Fg", LexicalCategory.Punctuation));
-        PAROLE.addTag(new PosTag("Fh", LexicalCategory.Punctuation));
-        PAROLE.addTag(new PosTag("Fia", LexicalCategory.Punctuation));
-        PAROLE.addTag(new PosTag("Fit", LexicalCategory.Punctuation));
-        PAROLE.addTag(new PosTag("Fp", LexicalCategory.Punctuation));
-        PAROLE.addTag(new PosTag("Fpa", LexicalCategory.Punctuation));
-        PAROLE.addTag(new PosTag("Fpt", LexicalCategory.Punctuation));
-        PAROLE.addTag(new PosTag("Fs", LexicalCategory.Punctuation));
-        PAROLE.addTag(new PosTag("Fx", LexicalCategory.Punctuation));
+        PAROLE.addTag(new PosTag("Fat", Pos.ExclamativePoint));
+        PAROLE.addTag(new PosTag("Fc", Pos.Comma));
+        PAROLE.addTag(new PosTag("Fd", Pos.Colon));
+        PAROLE.addTag(new PosTag("Fe", Pos.Quote));
+        PAROLE.addTag(new PosTag("Fg", Pos.Hyphen));
+        PAROLE.addTag(new PosTag("Fh", Pos.Slash));
+        PAROLE.addTag(new PosTag("Fia", Pos.InvertedQuestionMark));
+        PAROLE.addTag(new PosTag("Fit", Pos.QuestionMark));
+        PAROLE.addTag(new PosTag("Fp", Pos.Point));
+        PAROLE.addTag(new PosTag("Fpa", Pos.OpenParenthesis));
+        PAROLE.addTag(new PosTag("Fpt", Pos.CloseParenthesis));
+        PAROLE.addTag(new PosTag("Fs", Pos.SuspensionPoints));
+        PAROLE.addTag(new PosTag("Fx", Pos.SemiColon));
         PAROLE.addTag(new PosTag("Fz", LexicalCategory.Punctuation));
         PAROLE.addTag(new PosTag("I", LexicalCategory.Interjection));
-        PAROLE.addTag(new PosTag("NC", LexicalCategory.Noun));
-        PAROLE.addTag(new PosTag("NP", LexicalCategory.Noun));
-        PAROLE.addTag(new PosTag("P0", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("PD", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("PE", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("PI", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("PN", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("PP", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("PR", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("PT", LexicalCategory.PronounOrDeterminer));
-        PAROLE.addTag(new PosTag("PX", LexicalCategory.PronounOrDeterminer));
+        PAROLE.addTag(new PosTag("NC", Pos.CommonNoun));
+        PAROLE.addTag(new PosTag("NP", Pos.ProperNoun));
+        PAROLE.addTag(new PosTag("P0", Pos.Pronoun)); //TODO: CliticPronoun is missing
+        PAROLE.addTag(new PosTag("PD", Pos.DemonstrativePronoun));
+        PAROLE.addTag(new PosTag("PE", Pos.ExclamatoryPronoun));
+        PAROLE.addTag(new PosTag("PI", Pos.IndefinitePronoun));
+        PAROLE.addTag(new PosTag("PN", Pos.Pronoun)); //TODO: NumeralPronoun is missing
+        PAROLE.addTag(new PosTag("PP", Pos.PersonalPronoun));
+        PAROLE.addTag(new PosTag("PR", Pos.RelativePronoun));
+        PAROLE.addTag(new PosTag("PT", Pos.InterrogativePronoun));
+        PAROLE.addTag(new PosTag("PX", Pos.PossessivePronoun));
         PAROLE.addTag(new PosTag("RG", LexicalCategory.Adverb));
-        PAROLE.addTag(new PosTag("RN", LexicalCategory.Adverb));
-        PAROLE.addTag(new PosTag("SP", LexicalCategory.Adposition));
-        PAROLE.addTag(new PosTag("VAG", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VAI", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VAM", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VAN", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VAP", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VAS", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VMG", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VMI", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VMM", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VMN", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VMP", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VMS", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VSG", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VSI", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VSM", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VSN", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VSP", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("VSS", LexicalCategory.Verb));
-        PAROLE.addTag(new PosTag("W", LexicalCategory.Quantifier)); //date times
-        PAROLE.addTag(new PosTag("X", null)); //unknown
-        PAROLE.addTag(new PosTag("Y", null)); //abbreviation
-        PAROLE.addTag(new PosTag("Z", null)); //Figures
-        PAROLE.addTag(new PosTag("Zm", LexicalCategory.Quantifier)); //currency
-        PAROLE.addTag(new PosTag("Zp", LexicalCategory.Quantifier)); //percentage
+        PAROLE.addTag(new PosTag("RN", Pos.NegativeAdverb));
+        PAROLE.addTag(new PosTag("SP", Pos.Preposition));
+        PAROLE.addTag(new PosTag("VAG", Pos.StrictAuxiliaryVerb, Pos.Gerund));
+        PAROLE.addTag(new PosTag("VAI", Pos.StrictAuxiliaryVerb, Pos.IndicativeVerb));
+        PAROLE.addTag(new PosTag("VAM", Pos.StrictAuxiliaryVerb, Pos.ImperativeVerb));
+        PAROLE.addTag(new PosTag("VAN", Pos.StrictAuxiliaryVerb, Pos.Infinitive));
+        PAROLE.addTag(new PosTag("VAP", Pos.StrictAuxiliaryVerb, Pos.Participle));
+        PAROLE.addTag(new PosTag("VAS", Pos.StrictAuxiliaryVerb, Pos.SubjunctiveVerb));
+        PAROLE.addTag(new PosTag("VMG", Pos.MainVerb, Pos.Gerund));
+        PAROLE.addTag(new PosTag("VMI", Pos.MainVerb, Pos.IndicativeVerb));
+        PAROLE.addTag(new PosTag("VMM", Pos.MainVerb, Pos.ImperativeVerb));
+        PAROLE.addTag(new PosTag("VMN", Pos.MainVerb, Pos.Infinitive));
+        PAROLE.addTag(new PosTag("VMP", Pos.MainVerb, Pos.Participle));
+        PAROLE.addTag(new PosTag("VMS", Pos.MainVerb, Pos.SubjunctiveVerb));
+        PAROLE.addTag(new PosTag("VSG", Pos.ModalVerb, Pos.Gerund));
+        PAROLE.addTag(new PosTag("VSI", Pos.ModalVerb, Pos.IndicativeVerb));
+        PAROLE.addTag(new PosTag("VSM", Pos.ModalVerb, Pos.ImperativeVerb));
+        PAROLE.addTag(new PosTag("VSN", Pos.ModalVerb, Pos.Infinitive));
+        PAROLE.addTag(new PosTag("VSP", Pos.ModalVerb, Pos.Participle));
+        PAROLE.addTag(new PosTag("VSS", Pos.ModalVerb, Pos.SubjunctiveVerb));
+        PAROLE.addTag(new PosTag("W", Pos.Date)); //date times
+        PAROLE.addTag(new PosTag("X")); //unknown
+        PAROLE.addTag(new PosTag("Y", Pos.Abbreviation)); //abbreviation
+        PAROLE.addTag(new PosTag("Z", Pos.Image)); //Figures
+        PAROLE.addTag(new PosTag("Zm", Pos.Symbol)); //currency
+        PAROLE.addTag(new PosTag("Zp", Pos.Symbol)); //percentage
         
         
     }

Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java?rev=1403223&r1=1403222&r2=1403223&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/LanguageConfiguration.java Mon Oct 29 10:14:03 2012
@@ -9,20 +9,60 @@ import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
+import org.osgi.framework.ServiceReference;
 import org.osgi.service.cm.ConfigurationException;
 
 /**
- * Utility that supports the configuration of languages in the form of
+ * Utility that supports the configuration of languages and language
+ * specific parameters.
+ * <h3>Language configuration</h3>
+ * Languages are configured as follows:
  * <pre>
- *     de,en
- * </pre>
- * of
+ *     de,en </pre>
+ * or 
  * <pre>
- *     !fr,!cn,*
- * </pre>
+ *     !fr,!cn,*</pre>
+ * The '<code>!{lang}</code>' is used to {@link #getExplicitlyExcluded() 
+ * explicitly exclude} an language. '<code>*</code>' can be used to
+ * specify that all languages are allowed. '<code>{lang}</code>'
+ * {@link #getExplicitlyIncluded() explicitly includes} a language.
+ * '<code>,</code>' is used as separator between multiple configurations
+ * however this class also supports the usage of  <code>String[]</code> and
+ * {@link Collection<?>} (in case of Collections the
+ * {@link Object#toString()} method is used to obtain the configuration).
+ * If an array or a collection is used for the configuration, than comma
+ * is NOT used as separator!
  * <p>
- * Instead of comma separated Strings also <code>String[]</code> and
- * {@link Collection} are supported.
+ * <h3>Parameter Support</h3>
+ * This class supports the parsing of language specific parameters by
+ * the followng syntax
+ * <pre>
+ *    {language};{param-name}={param-value};{param-name}={param-value}</pre>
+ * Parameters that apply to all {languages} with no configuration can be
+ * either set for the '<code>*</code>' or an empty language tag. Here
+ * is an example
+ * <pre>
+ *     *;myParam=myValue
+ *     ;myParam=myValue</pre>
+ * Multiple default configurations will cause a {@link ConfigurationException}.
+ * <p>
+ * The {@link #getParameters(String)} and {@link #getParameters(String,String)}
+ * will return values of the {@link #getDefaultParameters()} if no
+ * language specific parameters are present for the requested language. However
+ * the default configuration is not merged but replaced by language specific
+ * parameter declarations. Applications that want to use the default configuration
+ * as fallback to language specific settings can implement this by
+ * using the properties provided by {@link #getDefaultParameters()}.
+ * <p>
+ * <b>Notes</b> <ul>
+ * <li>only the first occurrence of '<code>=</code>' within an
+ * parameter is used as separator between the param name and value. This
+ * means that the {param-name} is allowed to contain '='.
+ * <li>in case a comma separated string is used for the lanugage
+ * configuration parameter declaration MUST NOT contain 
+ * '<code>,</code>' (comma) values. In case a <code>String[]</code> or an
+ * {@link Collection} is used this is not the case.
+ * </ul>
  *
  * @author Rupert Westenthaler
  *
@@ -37,7 +77,7 @@ public class LanguageConfiguration {
     private Map<String,Map<String,String>> configuredLanguages = new HashMap<String,Map<String,String>>();
     private Set<String> excludedLanguages = new HashSet<String>();
     private boolean allowAll;
-    
+    private Map<String,String> defaultParameters = EMPTY_PARAMS;
     @SuppressWarnings("unchecked")
     public LanguageConfiguration(String property, String[] defaultConfig){
         if(property == null || property.isEmpty()){
@@ -60,12 +100,40 @@ public class LanguageConfiguration {
     
     /**
      * Reads the config for the configured {@link #getProperty() property}
-     * from the parsed configuration. This supports <code>String[]</code>,
-     * <code>Collection</code>, and comma separated Strings
-     * @param configuration
+     * from the parsed configuration. <p>
+     * This implementation supports
+     * <code>null</code> (sets the default), <code>String[]</code>,
+     * <code>Collections<?></code> (Object{@link #toString() toString()} is called
+     * on members) and comma separated {@link String}.
+     * @param configuration the configuration
      */
     public void setConfiguration(Dictionary<?,?> configuration) throws ConfigurationException {
-        Object value = configuration.get(property);
+        processConfiguration(configuration.get(property));
+    }
+    /**
+     * Reads the configuration for the configured {@link #getProperty()} from
+     * the properties of the parsed {@link ServiceReference}.<p>
+     * This implementation supports
+     * <code>null</code> (sets the default), <code>String[]</code>,
+     * <code>Collections<?></code> (Object{@link #toString() toString()} is called
+     * on members) and comma separated {@link String}.
+     * @param ref the SerivceRefernece
+     * @throws ConfigurationException
+     */
+    public void setConfiguration(ServiceReference ref) throws ConfigurationException {
+        processConfiguration(ref.getProperty(property));
+    }
+    
+    /**
+     * Reads the configuration for the parsed value. <p>
+     * This implementation supports
+     * <code>null</code> (sets the default), <code>String[]</code>,
+     * <code>Collections<?></code> (Object{@link #toString() toString()} is called
+     * on members) and comma separated {@link String}.
+     * @param value the value
+     * @throws ConfigurationException if the configuration of is invalid
+     */
+    protected void processConfiguration(Object value) throws ConfigurationException {
         Collection<?> config;
         if(value == null){
             config = defaultConfig;
@@ -88,6 +156,10 @@ public class LanguageConfiguration {
         if(config == null){
             config = defaultConfig;
         }
+        //rest values
+        configuredLanguages.clear();
+        excludedLanguages.clear();
+        defaultParameters = EMPTY_PARAMS; //do not change values in multi threaded environments
         for(Object value : config) {
             if(value == null){
                 continue; //ignore null values
@@ -106,6 +178,7 @@ public class LanguageConfiguration {
                 excludedLanguages.add(lang);
             } else if("*".equals(lang)){
                 allowAll = true;
+                parsedDefaultParameters(line, sepIndex);
             } else if(!lang.isEmpty()){
                 if(excludedLanguages.contains(lang)){
                     throw new ConfigurationException(property, 
@@ -118,10 +191,32 @@ public class LanguageConfiguration {
                 configuredLanguages.put(lang,sepIndex >= 0 && sepIndex < line.length()-2 ? 
                         parseParameters(line.substring(sepIndex, line.length()).trim()) :
                             EMPTY_PARAMS);
+            } else { //language tag is empty (line starts with an ';'
+                //this indicates that this is used to configure the default parameters
+                parsedDefaultParameters(line, sepIndex);
             }
         }
     }
     /**
+     * Parsed the {@link #defaultParameters} and also checks that not multiple 
+     * (non empty) of such configurations are present
+     * @param line the current line
+     * @param sepIndex the index of first ';' in the configuration line
+     * @throws ConfigurationException if multiple default configurations are present or
+     * if the parameters are illegal formatted.
+     */
+    private void parsedDefaultParameters(String line, int sepIndex) throws ConfigurationException {
+        if(!defaultParameters.isEmpty()){
+            throw new ConfigurationException(property, "Language Configuration MUST NOT "
+                + "contain multiple default property configurations. This are configurations "
+                + "of properties for the wildcard '*;{properties}' or the empty language "
+                + "';{properties}'.");
+        }
+        defaultParameters = sepIndex >= 0 && sepIndex < line.length()-2 ? 
+                parseParameters(line.substring(sepIndex, line.length()).trim()) :
+                    EMPTY_PARAMS;
+    }
+    /**
      * Parses optional parameters <code>{key}[={value}];{key2}[={value2}]</code>. Using
      * the same key multiple times will override the previouse value
      * @param paramString
@@ -164,13 +259,54 @@ public class LanguageConfiguration {
                 configuredLanguages.containsKey(language);
     }
     /**
+     * The explicitly configured languages
+     * @return
+     */
+    public Set<String> getExplicitlyIncluded(){
+        return configuredLanguages.keySet();
+    }
+    /**
+     * The explicitly excluded (e.g. !de) languages
+     * @return
+     */
+    public Set<String> getExplicitlyExcluded(){
+        return excludedLanguages;
+    }
+    /**
+     * If the '*' was used in the configuration to allow
+     * all lanugages. 
+     * @return
+     */
+    public boolean useWildcard(){
+        return allowAll;
+    }
+    
+    /**
      * Returns parsed parameters if <code>{@link #isLanguage(String)} == true</code>
      * @param language the language
      * @return the parameters or <code>null</code> if none or the parsed language
      * is not active.
      */
     public Map<String,String> getParameters(String language){
-        return isLanguage(language) ? configuredLanguages.get(language) : null;
+        if(isLanguage(language)){
+            Map<String,String> params = configuredLanguages.get(language);
+            if((params == null || params.isEmpty()) && //if no or empty parameters
+                    !defaultParameters.isEmpty()){ //and there are defaults
+                params = defaultParameters;
+            } else if(params == null){ //do not return NULL
+                params = EMPTY_PARAMS;
+            }
+            return params;
+        } else {
+            return null; //to indicate the parsed language is not active
+        }
+    }
+    /**
+     * Getter for the default parameters
+     * @return the default parameters, an empty map if none.
+     */
+    public Map<String,String> getDefaultParameters() {
+        return defaultParameters;
     }
     
     /**
@@ -179,7 +315,7 @@ public class LanguageConfiguration {
     public void setDefault() {
         try {
             parseConfiguration(defaultConfig);
-        }catch (ConfigurationException e) {
+        } catch (ConfigurationException e) {
             // can not happen else the default config is already validated
             // within the constructor
         }

Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NIFHelper.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NIFHelper.java?rev=1403223&r1=1403222&r2=1403223&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NIFHelper.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/utils/NIFHelper.java Mon Oct 29 10:14:03 2012
@@ -33,6 +33,7 @@ import org.apache.stanbol.enhancer.nlp.n
 import org.apache.stanbol.enhancer.nlp.nif.StringOntology;
 import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
 import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.pos.Pos;
 import org.apache.stanbol.enhancer.nlp.pos.PosTag;
 import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
@@ -238,9 +239,15 @@ public final class NIFHelper {
     public static void writePos(MGraph graph, Annotated annotated, UriRef segmentUri) {
         Value<PosTag> posTag = annotated.getAnnotation(NlpAnnotations.POS_ANNOTATION);
         if(posTag != null){
-            if(posTag.value().getCategory() != null){
-                graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), 
-                    posTag.value().getCategory().getUri()));
+            if(posTag.value().isMapped()){
+                for(Pos pos : posTag.value().getPos()){
+                    graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), 
+                        pos.getUri()));
+                }
+                for(LexicalCategory cat : posTag.value().getCategories()){
+                    graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), 
+                        cat.getUri()));
+                }
             }
             graph.add(new TripleImpl(segmentUri, SsoOntology.posTag.getUri(), 
                 lf.createTypedLiteral(posTag.value().getTag())));