You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/12/05 18:26:02 UTC
svn commit: r1548212 - in /stanbol/trunk/enhancement-engines/opennlp:
opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/
opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/model/
Author: rwesten
Date: Thu Dec 5 17:26:01 2013
New Revision: 1548212
URL: http://svn.apache.org/r1548212
Log:
STANBOL-1231: added POS Tag and Phrase Tag mapping for the Treebank+ Tagset in the opennlp-pos and opennlp-chunker engine.
Modified:
stanbol/trunk/enhancement-engines/opennlp/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java
stanbol/trunk/enhancement-engines/opennlp/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/model/PosTagSetRegistry.java
Modified: stanbol/trunk/enhancement-engines/opennlp/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/opennlp/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java?rev=1548212&r1=1548211&r2=1548212&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/opennlp/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java (original)
+++ stanbol/trunk/enhancement-engines/opennlp/opennlp-chunker/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/chunker/model/PhraseTagSetRegistry.java Thu Dec 5 17:26:01 2013
@@ -24,6 +24,7 @@ import opennlp.tools.chunker.Chunker;
import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.pos.Pos;
/**
* Registry for {@link PhraseTag} {@link TagSet}s used by OpenNLP
@@ -75,4 +76,22 @@ public class PhraseTagSetRegistry {
DEFAULT.addTag(new PhraseTag("PP", LexicalCategory.PronounOrDeterminer));
getInstance().add(DEFAULT);
}
+
+ public static final TagSet<PhraseTag> FRENCH = new TagSet<PhraseTag>(
+ "French Treebank+ Phrase TagSet", "fr");
+
+ static {
+ FRENCH.addTag(new PhraseTag("AP", LexicalCategory.Adjective));
+ FRENCH.addTag(new PhraseTag("AdP",LexicalCategory.Adverb));
+ FRENCH.addTag(new PhraseTag("COORD",LexicalCategory.Conjuction));
+ FRENCH.addTag(new PhraseTag("NP",LexicalCategory.Noun));
+ FRENCH.addTag(new PhraseTag("PP", LexicalCategory.PronounOrDeterminer));
+ FRENCH.addTag(new PhraseTag("VN",LexicalCategory.Verb));
+ FRENCH.addTag(new PhraseTag("VPinf",LexicalCategory.Verb));
+ FRENCH.addTag(new PhraseTag("VPpart",LexicalCategory.Verb));
+ FRENCH.addTag(new PhraseTag("Ssub"));
+ FRENCH.addTag(new PhraseTag("Srel"));
+ FRENCH.addTag(new PhraseTag("Sint"));
+ getInstance().add(FRENCH);
+ }
}
Modified: stanbol/trunk/enhancement-engines/opennlp/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/model/PosTagSetRegistry.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/opennlp/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/model/PosTagSetRegistry.java?rev=1548212&r1=1548211&r2=1548212&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/opennlp/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/model/PosTagSetRegistry.java (original)
+++ stanbol/trunk/enhancement-engines/opennlp/opennlp-pos/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/pos/model/PosTagSetRegistry.java Thu Dec 5 17:26:01 2013
@@ -228,11 +228,11 @@ public final class PosTagSetRegistry {
SWEDISH.addTag(new PosTag("HV",LexicalCategory.Verb)); //"ha(va)" (have)
SWEDISH.addTag(new PosTag("WV",LexicalCategory.Verb)); //"vilja" (want)
SWEDISH.addTag(new PosTag("QV",LexicalCategory.Verb)); //"kunna" (can)
- SWEDISH.addTag(new PosTag("MV",LexicalCategory.Verb)); //"måste" (must)
+ SWEDISH.addTag(new PosTag("MV",LexicalCategory.Verb)); //"m��ste" (must)
SWEDISH.addTag(new PosTag("KV",LexicalCategory.Verb)); // locution "komma att" (periphrastic future)
SWEDISH.addTag(new PosTag("SV",LexicalCategory.Verb)); //"skola" (will, shall)
- SWEDISH.addTag(new PosTag("GV",LexicalCategory.Verb)); //"göra" (do, make)
- SWEDISH.addTag(new PosTag("FV",LexicalCategory.Verb)); //få" (get)
+ SWEDISH.addTag(new PosTag("GV",LexicalCategory.Verb)); //"g��ra" (do, make)
+ SWEDISH.addTag(new PosTag("FV",LexicalCategory.Verb)); //f��" (get)
SWEDISH.addTag(new PosTag("VV",LexicalCategory.Verb)); //all other verbs
SWEDISH.addTag(new PosTag("TP",Pos.PastParticiple)); //PerfectParticle
SWEDISH.addTag(new PosTag("SP",Pos.PresentParticiple));
@@ -257,5 +257,56 @@ public final class PosTagSetRegistry {
SWEDISH.addTag(new PosTag("XX"));
getInstance().add(SWEDISH);
}
-
+ /**
+ * POS tags used by the French Treebank as described in
+ * <a href="http://alpage.inria.fr/statgram/frdep/Publications/crabbecandi-taln2008-final.pdf">
+ * Expériences dâanalyse syntaxique statistique du français</a> page 8.<p>
+ * Note that this Tagset was originally introduced by Crabb Ìe & Candito, 2008
+ * but the linked paper contains a nice tabular overview of it.
+ */
+ public static final TagSet<PosTag> FRENCH = new TagSet<PosTag>("Treebank+ French","fr");
+
+ static {
+ //Cat C
+ FRENCH.addTag(new PosTag("CS",Pos.SubordinatingConjunction));
+ FRENCH.addTag(new PosTag("CC",Pos.CoordinatingConjunction));
+ //Cat CL
+ FRENCH.addTag(new PosTag("CLO", Pos.PersonalPronoun)); //Clitic
+ FRENCH.addTag(new PosTag("CLS", Pos.PersonalPronoun)); //Clitic
+ FRENCH.addTag(new PosTag("CLR", Pos.PersonalPronoun)); //Clitic
+ //Cat P
+ FRENCH.addTag(new PosTag("P",Pos.Preposition));
+ FRENCH.addTag(new PosTag("P+D")); //no cat
+ FRENCH.addTag(new PosTag("P+PRO")); //no cat
+ //Cat I
+ FRENCH.addTag(new PosTag("I", LexicalCategory.Interjection)); //no cat
+ //Cat PONCT
+ FRENCH.addTag(new PosTag("PONCT",LexicalCategory.Punctuation));
+ //Cat ET
+ FRENCH.addTag(new PosTag("ET", Pos.Foreign));
+ //Cat A
+ FRENCH.addTag(new PosTag("ADJ",LexicalCategory.Adjective));
+ FRENCH.addTag(new PosTag("ADJWH",LexicalCategory.Adjective));
+ //Cat ADV
+ FRENCH.addTag(new PosTag("ADV",LexicalCategory.Adverb));
+ FRENCH.addTag(new PosTag("ADVWH",LexicalCategory.Adverb));
+ //Cat PRO
+ FRENCH.addTag(new PosTag("PRO",Pos.StrongPersonalPronoun)); //Strong Pronoun
+ FRENCH.addTag(new PosTag("PROWH",Pos.StrongPersonalPronoun)); //Strong Pronoun
+ FRENCH.addTag(new PosTag("PROREL",Pos.StrongPersonalPronoun)); //Strong Pronoun
+ //Cat D
+ FRENCH.addTag(new PosTag("DET",Pos.Determiner));
+ FRENCH.addTag(new PosTag("DETWH",Pos.Determiner));
+ //Cat N
+ FRENCH.addTag(new PosTag("NC", Pos.CommonNoun));
+ FRENCH.addTag(new PosTag("NPP", Pos.ProperNoun));
+ //Cat V
+ FRENCH.addTag(new PosTag("V",Pos.IndicativeVerb));
+ FRENCH.addTag(new PosTag("VIMP",Pos.ImperativeVerb));
+ FRENCH.addTag(new PosTag("VINF",Pos.Infinitive));
+ FRENCH.addTag(new PosTag("VS",Pos.SubjunctiveVerb));
+ FRENCH.addTag(new PosTag("VPP",Pos.PastParticiple));
+ FRENCH.addTag(new PosTag("VPR", Pos.PresentParticiple)); //Verb Present?
+ getInstance().add(FRENCH);
+ }
}