You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/12/05 18:26:02 UTC

svn commit: r1548212 - in /stanbol/trunk/enhancement-engines/opennlp: opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/ opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/model/

Author: rwesten
Date: Thu Dec  5 17:26:01 2013
New Revision: 1548212

URL: http://svn.apache.org/r1548212
Log:
STANBOL-1231: added POS Tag and Phrase Tag mapping for the Treebank+ Tagset in the opennlp-pos and opennlp-chunker engine.

Modified:
    stanbol/trunk/enhancement-engines/opennlp/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java
    stanbol/trunk/enhancement-engines/opennlp/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/model/PosTagSetRegistry.java

Modified: stanbol/trunk/enhancement-engines/opennlp/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/opennlp/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java?rev=1548212&r1=1548211&r2=1548212&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/opennlp/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java (original)
+++ stanbol/trunk/enhancement-engines/opennlp/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java Thu Dec  5 17:26:01 2013
@@ -24,6 +24,7 @@ import opennlp.tools.chunker.Chunker;
 import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
 import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
 import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.pos.Pos;
 
 /**
  * Registry for {@link PhraseTag} {@link TagSet}s used by OpenNLP
@@ -75,4 +76,22 @@ public class PhraseTagSetRegistry {
         DEFAULT.addTag(new PhraseTag("PP", LexicalCategory.PronounOrDeterminer));
         getInstance().add(DEFAULT);
     }
+
+    public static final TagSet<PhraseTag> FRENCH = new TagSet<PhraseTag>(
+            "French Treebank+ Phrase TagSet", "fr");
+    
+    static {
+        FRENCH.addTag(new PhraseTag("AP", LexicalCategory.Adjective));
+        FRENCH.addTag(new PhraseTag("AdP",LexicalCategory.Adverb));
+        FRENCH.addTag(new PhraseTag("COORD",LexicalCategory.Conjuction));
+        FRENCH.addTag(new PhraseTag("NP",LexicalCategory.Noun));
+        FRENCH.addTag(new PhraseTag("PP", LexicalCategory.PronounOrDeterminer));
+        FRENCH.addTag(new PhraseTag("VN",LexicalCategory.Verb));
+        FRENCH.addTag(new PhraseTag("VPinf",LexicalCategory.Verb));
+        FRENCH.addTag(new PhraseTag("VPpart",LexicalCategory.Verb));
+        FRENCH.addTag(new PhraseTag("Ssub"));
+        FRENCH.addTag(new PhraseTag("Srel"));
+        FRENCH.addTag(new PhraseTag("Sint"));
+        getInstance().add(FRENCH);
+    }
 }

Modified: stanbol/trunk/enhancement-engines/opennlp/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/model/PosTagSetRegistry.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/opennlp/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/model/PosTagSetRegistry.java?rev=1548212&r1=1548211&r2=1548212&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/opennlp/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/model/PosTagSetRegistry.java (original)
+++ stanbol/trunk/enhancement-engines/opennlp/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/model/PosTagSetRegistry.java Thu Dec  5 17:26:01 2013
@@ -228,11 +228,11 @@ public final class PosTagSetRegistry {
         SWEDISH.addTag(new PosTag("HV",LexicalCategory.Verb)); //"ha(va)" (have)
         SWEDISH.addTag(new PosTag("WV",LexicalCategory.Verb)); //"vilja" (want)
         SWEDISH.addTag(new PosTag("QV",LexicalCategory.Verb)); //"kunna" (can)
-        SWEDISH.addTag(new PosTag("MV",LexicalCategory.Verb)); //"måste" (must)
+        SWEDISH.addTag(new PosTag("MV",LexicalCategory.Verb)); //"m��ste" (must)
         SWEDISH.addTag(new PosTag("KV",LexicalCategory.Verb)); // locution "komma att" (periphrastic future)
         SWEDISH.addTag(new PosTag("SV",LexicalCategory.Verb)); //"skola" (will, shall)
-        SWEDISH.addTag(new PosTag("GV",LexicalCategory.Verb)); //"göra" (do, make)
-        SWEDISH.addTag(new PosTag("FV",LexicalCategory.Verb)); //få" (get)
+        SWEDISH.addTag(new PosTag("GV",LexicalCategory.Verb)); //"g��ra" (do, make)
+        SWEDISH.addTag(new PosTag("FV",LexicalCategory.Verb)); //f��" (get)
         SWEDISH.addTag(new PosTag("VV",LexicalCategory.Verb)); //all other verbs
         SWEDISH.addTag(new PosTag("TP",Pos.PastParticiple)); //PerfectParticle
         SWEDISH.addTag(new PosTag("SP",Pos.PresentParticiple));
@@ -257,5 +257,56 @@ public final class PosTagSetRegistry {
         SWEDISH.addTag(new PosTag("XX"));
         getInstance().add(SWEDISH);
     }
-
+    /**
+     * POS tags used by the French Treebank as described in 
+     * <a href="http://alpage.inria.fr/statgram/frdep/Publications/crabbecandi-taln2008-final.pdf">
+     * Expériences d’analyse syntaxique statistique du français</a> page 8.<p>
+     * Note that this Tagset was originally introduced by Crabb ́e & Candito, 2008
+     * but the linked paper contains a nice tabular overview of it.
+     */
+    public static final TagSet<PosTag> FRENCH = new TagSet<PosTag>("Treebank+ French","fr");
+    
+    static {
+        //Cat C
+        FRENCH.addTag(new PosTag("CS",Pos.SubordinatingConjunction));
+        FRENCH.addTag(new PosTag("CC",Pos.CoordinatingConjunction));
+        //Cat CL
+        FRENCH.addTag(new PosTag("CLO", Pos.PersonalPronoun)); //Clitic
+        FRENCH.addTag(new PosTag("CLS", Pos.PersonalPronoun)); //Clitic
+        FRENCH.addTag(new PosTag("CLR", Pos.PersonalPronoun)); //Clitic
+        //Cat P
+        FRENCH.addTag(new PosTag("P",Pos.Preposition));
+        FRENCH.addTag(new PosTag("P+D")); //no cat
+        FRENCH.addTag(new PosTag("P+PRO")); //no cat
+        //Cat I
+        FRENCH.addTag(new PosTag("I", LexicalCategory.Interjection)); //no cat
+        //Cat PONCT
+        FRENCH.addTag(new PosTag("PONCT",LexicalCategory.Punctuation));
+        //Cat ET
+        FRENCH.addTag(new PosTag("ET", Pos.Foreign));
+        //Cat A
+        FRENCH.addTag(new PosTag("ADJ",LexicalCategory.Adjective));
+        FRENCH.addTag(new PosTag("ADJWH",LexicalCategory.Adjective));
+        //Cat ADV
+        FRENCH.addTag(new PosTag("ADV",LexicalCategory.Adverb));
+        FRENCH.addTag(new PosTag("ADVWH",LexicalCategory.Adverb));
+        //Cat PRO
+        FRENCH.addTag(new PosTag("PRO",Pos.StrongPersonalPronoun)); //Strong Pronoun
+        FRENCH.addTag(new PosTag("PROWH",Pos.StrongPersonalPronoun)); //Strong Pronoun
+        FRENCH.addTag(new PosTag("PROREL",Pos.StrongPersonalPronoun)); //Strong Pronoun
+        //Cat D
+        FRENCH.addTag(new PosTag("DET",Pos.Determiner));
+        FRENCH.addTag(new PosTag("DETWH",Pos.Determiner));
+        //Cat N
+        FRENCH.addTag(new PosTag("NC", Pos.CommonNoun));
+        FRENCH.addTag(new PosTag("NPP", Pos.ProperNoun));
+        //Cat V
+        FRENCH.addTag(new PosTag("V",Pos.IndicativeVerb));
+        FRENCH.addTag(new PosTag("VIMP",Pos.ImperativeVerb));
+        FRENCH.addTag(new PosTag("VINF",Pos.Infinitive));
+        FRENCH.addTag(new PosTag("VS",Pos.SubjunctiveVerb));
+        FRENCH.addTag(new PosTag("VPP",Pos.PastParticiple));
+        FRENCH.addTag(new PosTag("VPR", Pos.PresentParticiple)); //Verb Present?
+        getInstance().add(FRENCH);
+    }
 }