You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/10/18 11:51:38 UTC
svn commit: r1399568 - in
/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src:
main/java/org/apache/stanbol/enhancer/engines/celi/
main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/
test/java/org/apache/stanbol/enhancer/...
Author: rwesten
Date: Thu Oct 18 09:51:37 2012
New Revision: 1399568
URL: http://svn.apache.org/viewvc?rev=1399568&view=rev
Log:
STANBOL-733: Applied the patch provided by Alessio
Added:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliMorphoFeatures.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliTagSetRegistry.java
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java
Added: stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliMorphoFeatures.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliMorphoFeatures.java?rev=1399568&view=auto
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliMorphoFeatures.java (added)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliMorphoFeatures.java Thu Oct 18 09:51:37 2012
@@ -0,0 +1,196 @@
+package org.apache.stanbol.enhancer.engines.celi;
+
+import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.Vector;
+
+import org.apache.clerezza.rdf.core.Language;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.NonLiteral;
+import org.apache.clerezza.rdf.core.Triple;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.commons.io.IOUtils;
+import org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine;
+import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.annotation.Annotation;
+import org.apache.stanbol.enhancer.nlp.morpho.Case;
+import org.apache.stanbol.enhancer.nlp.morpho.Definitness;
+import org.apache.stanbol.enhancer.nlp.morpho.Gender;
+import org.apache.stanbol.enhancer.nlp.morpho.NumberFeature;
+import org.apache.stanbol.enhancer.nlp.morpho.Person;
+import org.apache.stanbol.enhancer.nlp.morpho.Tense;
+import org.apache.stanbol.enhancer.nlp.morpho.VerbMood;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.pos.PosTag;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Represents a morphological interpretation of a {@link Token word}. Words might have different interpretations (typically depending on the POS) so this Tag allows to add information about all possible interpretations to a single word. This is
+ * needed if no POS information are present or if POS tags are ambiguous or of low confidence.
+ * <p>
+ * <b>TODO</b>s:
+ * <ul>
+ * <li>I would like to have {@link Case}, {@link Tense}, ... as own Annotations. However AFAIK those are all grouped to a single interpretation of the Token (driven by the POS tag).</li>
+ * <li>Maybe add a possibility to add unmapped information as <code>Map<String,List<String>></code>
+ * </ul>
+ *
+ * @author Alessio Bosca
+ *
+ */
+public class CeliMorphoFeatures {
+
+ public static final UriRef HAS_NUMBER = new UriRef("http://purl.org/olia/olia.owl#hasNumber");
+ public static final UriRef HAS_GENDER = new UriRef("http://purl.org/olia/olia.owl#hasGender");
+ public static final UriRef HAS_PERSON = new UriRef("http://purl.org/olia/olia.owl#hasPerson");
+ public static final UriRef HAS_CASE = new UriRef("http://purl.org/olia/olia.owl#hasCase");
+ public static final UriRef HAS_DEFINITENESS = new UriRef("http://purl.org/olia/olia.owl#hasDefiniteness");
+ public static final UriRef HAS_MOOD = new UriRef("http://purl.org/olia/olia.owl#hasMood");
+ public static final UriRef HAS_TENSE = new UriRef("http://purl.org/olia/olia.owl#hasTense");
+
+ private static final Logger log = LoggerFactory.getLogger(CeliMorphoFeatures.class);
+
+ private String lemma;
+
+ private Set<LexicalCategory> posSet=new HashSet<LexicalCategory>();
+ private Set<Gender> genderSet= new HashSet<Gender>();
+ private Set<NumberFeature> numberSet=new HashSet<NumberFeature>();
+ private Set<Case> caseFeatureSet=new HashSet<Case>();
+ private Set<Person> personSet=new HashSet<Person>();
+ private Set<Definitness> definitnessSet=new HashSet<Definitness>();
+ private Set<VerbMood> verbFormSet=new HashSet<VerbMood>();
+ private Set<Tense> tenseSet=new HashSet<Tense>();
+
+ public CeliMorphoFeatures(String lemma) {
+ if (lemma == null) {
+ throw new IllegalArgumentException("The parsed lemma MUST NOT be NULL!");
+ }
+ this.lemma = lemma;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o instanceof CeliMorphoFeatures && lemma.equals(((CeliMorphoFeatures) o).lemma)) {
+ CeliMorphoFeatures lt = (CeliMorphoFeatures) o;
+ return ((genderSet != null && genderSet.equals(lt.genderSet)) || (genderSet == null && lt.genderSet == null)) && ((caseFeatureSet != null && caseFeatureSet.equals(lt.caseFeatureSet)) || (caseFeatureSet == null && lt.caseFeatureSet == null))
+ && ((tenseSet != null && tenseSet.equals(lt.tenseSet)) || (tenseSet == null && lt.tenseSet == null)) && ((numberSet != null && numberSet.equals(lt.numberSet)) || (numberSet == null && lt.numberSet == null))
+ && ((definitnessSet != null && definitnessSet.equals(lt.definitnessSet)) || (definitnessSet == null && lt.definitnessSet == null)) && ((personSet != null && personSet.equals(lt.personSet)) || (personSet == null && lt.personSet == null))
+ && ((verbFormSet != null && verbFormSet.equals(lt.verbFormSet)) || (verbFormSet == null && lt.verbFormSet == null));
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return lemma.hashCode() + posSet.hashCode() + genderSet.hashCode() + personSet.hashCode() + caseFeatureSet.hashCode() + definitnessSet.hashCode() + verbFormSet.hashCode() + tenseSet.hashCode();
+ }
+
+ public final void addCase(Case caseFeature) {
+ this.caseFeatureSet.add(caseFeature);
+ }
+
+ public final void addDefinitness(Definitness definitness) {
+ this.definitnessSet.add(definitness);
+ }
+
+ public final void addGender(Gender gender) {
+ this.genderSet.add(gender);
+ }
+
+ public final void addNumber(NumberFeature number) {
+ this.numberSet.add(number);
+ }
+
+ public void addPerson(Person person) {
+ this.personSet.add(person);
+ }
+
+ public void addPos(LexicalCategory pos) {
+ this.posSet.add(pos);
+ }
+
+ public void addTense(Tense tense) {
+ this.tenseSet.add(tense);
+ }
+
+ public void addVerbForm(VerbMood verbForm) {
+ this.verbFormSet.add(verbForm);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder("MorphoTag(");
+ sb.append(lemma);
+ for(LexicalCategory pos: posSet){
+ sb.append("|pos:").append(pos);
+ }
+ for(Gender gender: genderSet){
+ sb.append("|gender:").append(gender);
+ }
+ for(NumberFeature num:numberSet) {
+ sb.append("|number:").append(num);
+ }
+ for(Person pers: personSet) {
+ sb.append("|person:").append(pers);
+ }
+ for(Definitness def: definitnessSet) {
+ sb.append("|definitness:").append(def);
+ }
+ for(Case caseFeat:caseFeatureSet) {
+ sb.append("|case:").append(caseFeat);
+ }
+ for (VerbMood vf:verbFormSet) {
+ sb.append("|verbForm:").append(vf);
+ }
+ for(Tense t:tenseSet) {
+ sb.append("|tense:").append(t);
+ }
+ sb.append(')');
+ return sb.toString();
+ }
+
+ public Collection<? extends Triple> featuresAsTriples(UriRef textAnnotation, Language lang) {
+ Collection<TripleImpl> result = new Vector<TripleImpl>();
+ result.add(new TripleImpl(textAnnotation, CeliLemmatizerEnhancementEngine.hasLemmaForm, new PlainLiteralImpl(this.lemma, lang)));
+ for(LexicalCategory pos: posSet){
+ result.add(new TripleImpl(textAnnotation, RDF_TYPE, pos.getUri()));
+ }
+ for(NumberFeature num: numberSet){
+ result.add(new TripleImpl(textAnnotation, HAS_NUMBER, num.getUri()));
+ }
+ for(Person pers: personSet){
+ result.add(new TripleImpl(textAnnotation, HAS_PERSON, pers.getUri()));
+ }
+ for(Gender gender: genderSet){
+ result.add(new TripleImpl(textAnnotation, HAS_GENDER, gender.getUri()));
+ }
+ for(Definitness def: definitnessSet){
+ result.add(new TripleImpl(textAnnotation, HAS_DEFINITENESS, def.getUri()));
+ }
+ for(Case caseFeat:caseFeatureSet){
+ result.add(new TripleImpl(textAnnotation, HAS_CASE, caseFeat.getUri()));
+ }
+ for (VerbMood vf:verbFormSet){
+ result.add(new TripleImpl(textAnnotation, HAS_MOOD, vf.getUri()));
+ }
+ for(Tense tense:tenseSet){
+ result.add(new TripleImpl(textAnnotation, HAS_TENSE, tense.getUri()));
+ }
+ return result;
+ }
+}
Added: stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliTagSetRegistry.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliTagSetRegistry.java?rev=1399568&view=auto
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliTagSetRegistry.java (added)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliTagSetRegistry.java Thu Oct 18 09:51:37 2012
@@ -0,0 +1,465 @@
+package org.apache.stanbol.enhancer.engines.celi;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
+import org.apache.stanbol.enhancer.nlp.morpho.Case;
+import org.apache.stanbol.enhancer.nlp.morpho.CaseTag;
+import org.apache.stanbol.enhancer.nlp.morpho.Definitness;
+import org.apache.stanbol.enhancer.nlp.morpho.DefinitnessTag;
+import org.apache.stanbol.enhancer.nlp.morpho.Gender;
+import org.apache.stanbol.enhancer.nlp.morpho.GenderTag;
+import org.apache.stanbol.enhancer.nlp.morpho.NumberFeature;
+import org.apache.stanbol.enhancer.nlp.morpho.NumberTag;
+import org.apache.stanbol.enhancer.nlp.morpho.Person;
+import org.apache.stanbol.enhancer.nlp.morpho.PersonTag;
+import org.apache.stanbol.enhancer.nlp.morpho.Tense;
+import org.apache.stanbol.enhancer.nlp.morpho.TenseTag;
+import org.apache.stanbol.enhancer.nlp.morpho.VerbMood;
+import org.apache.stanbol.enhancer.nlp.morpho.VerbMoodTag;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
+import org.apache.stanbol.enhancer.nlp.pos.PosTag;
+
+/**
+ * {@link TagSet}s for known CELI (linguagrid.org) POS and morphological features models.<p>
+ *
+ * @author Rupert Westenthaler
+ *
+ */
+public final class CeliTagSetRegistry {
+
+ private static CeliTagSetRegistry instance = new CeliTagSetRegistry();
+
+ private CeliTagSetRegistry(){}
+
+ private final Map<String, TagSet<PosTag>> posMappingsByLanguage = new HashMap<String,TagSet<PosTag>>();
+ private final Map<String, TagSet<GenderTag>> genderMappingsByLanguage = new HashMap<String, TagSet<GenderTag>>();
+ private final Map<String, TagSet<NumberTag>> numberMappingsByLanguage=new HashMap<String, TagSet<NumberTag>>();
+ private final Map<String, TagSet<PersonTag>> personMappingsByLanguage=new HashMap<String, TagSet<PersonTag>>();
+ private final Map<String, TagSet<CaseTag>> caseMappingsByLanguage=new HashMap<String, TagSet<CaseTag>>();
+ private final Map<String, TagSet<DefinitnessTag>> definitenessMappingsByLanguage=new HashMap<String, TagSet<DefinitnessTag>>();
+ private final Map<String, TagSet<VerbMoodTag>> verbFormMappingsByLanguage=new HashMap<String, TagSet<VerbMoodTag>>();
+ private final Map<String, TagSet<TenseTag>> tenseMappingsByLanguage=new HashMap<String, TagSet<TenseTag>>();
+
+
+ public static CeliTagSetRegistry getInstance(){
+ return instance;
+ }
+
+ /**
+ * Setter for the mappings of {@link TagSet} by language.
+ * @param mappings expressed with a {@link TagSet}
+ */
+ private void addPosTagset(TagSet<PosTag> model) {
+ for(String lang : model.getLanguages()){
+ if(posMappingsByLanguage.put(lang, model) != null){
+ throw new IllegalStateException("Multiple Models for Language '"
+ + lang+"'! This is an error in the static confituration of "
+ + "this class. Please report this to the stanbol-dev mailing"
+ + "list!");
+ }
+ }
+ }
+
+
+ /**
+ * Getter for the {@link TagSet} by language. If no {@link TagSet}
+ * is available for an Language this will return <code>null</code>
+ * @param language the language
+ * @return the AnnotationModel or <code>null</code> if non is defined
+ */
+ public TagSet<PosTag> getPosTagset(String language){
+ return posMappingsByLanguage.get(language);
+ }
+
+ /**
+ * Setter for the mappings of {@link TagSet} by language.
+ * @param mappings expressed with a {@link TagSet}
+ */
+ private void addGenderTagset(TagSet<GenderTag> model) {
+ for(String lang : model.getLanguages()){
+ if(genderMappingsByLanguage.put(lang, model) != null){
+ throw new IllegalStateException("Multiple Models for Language '"
+ + lang+"'! This is an error in the static confituration of "
+ + "this class. Please report this to the stanbol-dev mailing"
+ + "list!");
+ }
+ }
+ }
+
+ /**
+ * Getter for the {@link TagSet} by language. If no {@link TagSet}
+ * is available for an Language this will return <code>null</code>
+ * @param language the language
+ * @return the AnnotationModel or <code>null</code> if non is defined
+ */
+ public TagSet<GenderTag> getGenderTagset(String language){
+ return genderMappingsByLanguage.get(language);
+ }
+
+ /**
+ * Setter for the mappings of {@link TagSet} by language.
+ * @param mappings expressed with a {@link TagSet}
+ */
+ private void addNumberTagset(TagSet<NumberTag> model) {
+ for(String lang : model.getLanguages()){
+ if(numberMappingsByLanguage.put(lang, model) != null){
+ throw new IllegalStateException("Multiple Models for Language '"
+ + lang+"'! This is an error in the static confituration of "
+ + "this class. Please report this to the stanbol-dev mailing"
+ + "list!");
+ }
+ }
+ }
+
+ /**
+ * Getter for the {@link TagSet} by language. If no {@link TagSet}
+ * is available for an Language this will return <code>null</code>
+ * @param language the language
+ * @return the AnnotationModel or <code>null</code> if non is defined
+ */
+ public TagSet<NumberTag> getNumberTagset(String language){
+ return numberMappingsByLanguage.get(language);
+ }
+
+
+ /**
+ * Setter for the mappings of {@link TagSet} by language.
+ * @param mappings expressed with a {@link TagSet}
+ */
+ private void addPersonTagset(TagSet<PersonTag> model) {
+ for(String lang : model.getLanguages()){
+ if(personMappingsByLanguage.put(lang, model) != null){
+ throw new IllegalStateException("Multiple Models for Language '"
+ + lang+"'! This is an error in the static confituration of "
+ + "this class. Please report this to the stanbol-dev mailing"
+ + "list!");
+ }
+ }
+ }
+
+ /**
+ * Getter for the {@link TagSet} by language. If no {@link TagSet}
+ * is available for an Language this will return <code>null</code>
+ * @param language the language
+ * @return the AnnotationModel or <code>null</code> if non is defined
+ */
+ public TagSet<PersonTag> getPersonTagset(String language){
+ return personMappingsByLanguage.get(language);
+ }
+
+ /**
+ * Setter for the mappings of {@link TagSet} by language.
+ * @param mappings expressed with a {@link TagSet}
+ */
+ private void addCaseTagset(TagSet<CaseTag> model) {
+ for(String lang : model.getLanguages()){
+ if(caseMappingsByLanguage.put(lang, model) != null){
+ throw new IllegalStateException("Multiple Models for Language '"
+ + lang+"'! This is an error in the static confituration of "
+ + "this class. Please report this to the stanbol-dev mailing"
+ + "list!");
+ }
+ }
+ }
+
+ /**
+ * Getter for the {@link TagSet} by language. If no {@link TagSet}
+ * is available for an Language this will return <code>null</code>
+ * @param language the language
+ * @return the AnnotationModel or <code>null</code> if non is defined
+ */
+ public TagSet<CaseTag> getCaseTagset(String language){
+ return caseMappingsByLanguage.get(language);
+ }
+
+ /**
+ * Setter for the mappings of {@link TagSet} by language.
+ * @param mappings expressed with a {@link TagSet}
+ */
+ private void addDefinitnessTagset(TagSet<DefinitnessTag> model) {
+ for(String lang : model.getLanguages()){
+ if(definitenessMappingsByLanguage.put(lang, model) != null){
+ throw new IllegalStateException("Multiple Models for Language '"
+ + lang+"'! This is an error in the static confituration of "
+ + "this class. Please report this to the stanbol-dev mailing"
+ + "list!");
+ }
+ }
+ }
+
+ /**
+ * Getter for the {@link TagSet} by language. If no {@link TagSet}
+ * is available for an Language this will return <code>null</code>
+ * @param language the language
+ * @return the AnnotationModel or <code>null</code> if non is defined
+ */
+ public TagSet<DefinitnessTag> getDefinitnessTagset(String language){
+ return definitenessMappingsByLanguage.get(language);
+ }
+
+ /**
+ * Setter for the mappings of {@link TagSet} by language.
+ * @param mappings expressed with a {@link TagSet}
+ */
+ private void addVerbFormTagset(TagSet<VerbMoodTag> model) {
+ for(String lang : model.getLanguages()){
+ if(verbFormMappingsByLanguage.put(lang, model) != null){
+ throw new IllegalStateException("Multiple Models for Language '"
+ + lang+"'! This is an error in the static confituration of "
+ + "this class. Please report this to the stanbol-dev mailing"
+ + "list!");
+ }
+ }
+ }
+
+ /**
+ * Getter for the {@link TagSet} by language. If no {@link TagSet}
+ * is available for an Language this will return <code>null</code>
+ * @param language the language
+ * @return the AnnotationModel or <code>null</code> if non is defined
+ */
+ public TagSet<VerbMoodTag> getVerbFormTagset(String language){
+ return verbFormMappingsByLanguage.get(language);
+ }
+
+ /**
+ * Setter for the mappings of {@link TagSet} by language.
+ * @param mappings expressed with a {@link TagSet}
+ */
+ private void addTenseTagset(TagSet<TenseTag> model) {
+ for(String lang : model.getLanguages()){
+ if(tenseMappingsByLanguage.put(lang, model) != null){
+ throw new IllegalStateException("Multiple Models for Language '"
+ + lang+"'! This is an error in the static confituration of "
+ + "this class. Please report this to the stanbol-dev mailing"
+ + "list!");
+ }
+ }
+ }
+
+ /**
+ * Getter for the {@link TagSet} by language. If no {@link TagSet}
+ * is available for an Language this will return <code>null</code>
+ * @param language the language
+ * @return the AnnotationModel or <code>null</code> if non is defined
+ */
+ public TagSet<TenseTag> getTenseTagset(String language){
+ return tenseMappingsByLanguage.get(language);
+ }
+
+
+ /*****************************************************************
+ * POS TAGSETS MAPPINGS TO OLIA ONTOLOGY *
+ ****************************************************************/
+
+
+ public static final TagSet<PosTag> ITALIAN = new TagSet<PosTag>("CELI Italian POS tags", "it");
+ static {
+ ITALIAN.addTag(new PosTag("N", LexicalCategory.Noun));
+ ITALIAN.addTag(new PosTag("NF", LexicalCategory.Noun));
+ ITALIAN.addTag(new PosTag("NM", LexicalCategory.Noun));
+ ITALIAN.addTag(new PosTag("ADJ", LexicalCategory.Adjective));
+ ITALIAN.addTag(new PosTag("ADV", LexicalCategory.Adverb));
+ ITALIAN.addTag(new PosTag("ART", LexicalCategory.PronounOrDeterminer));
+ ITALIAN.addTag(new PosTag("PRON", LexicalCategory.PronounOrDeterminer));
+ ITALIAN.addTag(new PosTag("PRONWH", LexicalCategory.PronounOrDeterminer));
+ ITALIAN.addTag(new PosTag("DETDEMO", LexicalCategory.PronounOrDeterminer));
+ ITALIAN.addTag(new PosTag("DETINDEF", LexicalCategory.PronounOrDeterminer));
+ ITALIAN.addTag(new PosTag("DETPOSS", LexicalCategory.PronounOrDeterminer));
+ ITALIAN.addTag(new PosTag("DETWH", LexicalCategory.PronounOrDeterminer));
+ ITALIAN.addTag(new PosTag("CHEWH", LexicalCategory.PronounOrDeterminer));
+ ITALIAN.addTag(new PosTag("CLI", LexicalCategory.Clitic));
+ ITALIAN.addTag(new PosTag("CONJ", LexicalCategory.Conjuction));
+ ITALIAN.addTag(new PosTag("CONGWH", LexicalCategory.Conjuction));
+ ITALIAN.addTag(new PosTag("PREP", LexicalCategory.Adposition));
+ ITALIAN.addTag(new PosTag("V", LexicalCategory.Verb));
+ ITALIAN.addTag(new PosTag("INT", LexicalCategory.Interjection));
+ ITALIAN.addTag(new PosTag("NEG", LexicalCategory.Adverb));
+ ITALIAN.addTag(new PosTag("NUM", LexicalCategory.Numeral));
+ getInstance().addPosTagset(ITALIAN);
+ }
+
+
+ public static final TagSet<PosTag> GERMAN = new TagSet<PosTag>("CELI German POS tags", "de");
+ static {
+ GERMAN.addTag(new PosTag("N", LexicalCategory.Noun));
+ GERMAN.addTag(new PosTag("ADJ", LexicalCategory.Adjective));
+ GERMAN.addTag(new PosTag("ADV", LexicalCategory.Adverb));
+ GERMAN.addTag(new PosTag("ART", LexicalCategory.PronounOrDeterminer));
+ GERMAN.addTag(new PosTag("PRON", LexicalCategory.PronounOrDeterminer));
+ GERMAN.addTag(new PosTag("CONJ", LexicalCategory.Conjuction));
+ GERMAN.addTag(new PosTag("PREP", LexicalCategory.Adposition));
+ GERMAN.addTag(new PosTag("PREPART", LexicalCategory.Adposition));
+ GERMAN.addTag(new PosTag("V", LexicalCategory.Verb));
+ GERMAN.addTag(new PosTag("INT", LexicalCategory.Interjection));
+ GERMAN.addTag(new PosTag("NUM", LexicalCategory.Numeral));
+ getInstance().addPosTagset(GERMAN);
+ }
+
+ public static final TagSet<PosTag> DANISH = new TagSet<PosTag>("CELI Danish POS tags", "da");
+ static {
+ DANISH.addTag(new PosTag("N", LexicalCategory.Noun));
+ DANISH.addTag(new PosTag("NF", LexicalCategory.Noun));
+ DANISH.addTag(new PosTag("NN", LexicalCategory.Noun));
+ DANISH.addTag(new PosTag("NP", LexicalCategory.ProperNoun));
+ DANISH.addTag(new PosTag("ADJ", LexicalCategory.Adjective));
+ DANISH.addTag(new PosTag("ADV", LexicalCategory.Adverb));
+ DANISH.addTag(new PosTag("ART", LexicalCategory.PronounOrDeterminer));
+ DANISH.addTag(new PosTag("PRON", LexicalCategory.PronounOrDeterminer));
+ DANISH.addTag(new PosTag("CONJ", LexicalCategory.Conjuction));
+ DANISH.addTag(new PosTag("PREP", LexicalCategory.Adposition));
+ DANISH.addTag(new PosTag("V", LexicalCategory.Verb));
+ DANISH.addTag(new PosTag("INT", LexicalCategory.Interjection));
+ DANISH.addTag(new PosTag("INVAR", LexicalCategory.Residual));
+ DANISH.addTag(new PosTag("NUM", LexicalCategory.Numeral));
+ getInstance().addPosTagset(DANISH);
+ }
+
+ public static final TagSet<PosTag> RUSSIAN = new TagSet<PosTag>("CELI Russian POS tags", "ru");
+ static {
+ RUSSIAN.addTag(new PosTag("N", LexicalCategory.Noun));
+ RUSSIAN.addTag(new PosTag("NF", LexicalCategory.Noun));
+ RUSSIAN.addTag(new PosTag("NM", LexicalCategory.Noun));
+ RUSSIAN.addTag(new PosTag("NN", LexicalCategory.Noun));
+ RUSSIAN.addTag(new PosTag("NP", LexicalCategory.ProperNoun));
+ RUSSIAN.addTag(new PosTag("NPLUR", LexicalCategory.Noun));
+ RUSSIAN.addTag(new PosTag("ADJ", LexicalCategory.Adjective));
+ RUSSIAN.addTag(new PosTag("ADV", LexicalCategory.Adverb));
+ RUSSIAN.addTag(new PosTag("PRON", LexicalCategory.PronounOrDeterminer));
+ RUSSIAN.addTag(new PosTag("CONJ", LexicalCategory.Conjuction));
+ RUSSIAN.addTag(new PosTag("PREP", LexicalCategory.Adposition));
+ RUSSIAN.addTag(new PosTag("V", LexicalCategory.Verb));
+ RUSSIAN.addTag(new PosTag("INT", LexicalCategory.Interjection));
+ RUSSIAN.addTag(new PosTag("NUM", LexicalCategory.Numeral));
+ RUSSIAN.addTag(new PosTag("NUMNON", LexicalCategory.PronounOrDeterminer));
+ RUSSIAN.addTag(new PosTag("PART", LexicalCategory.Unique));
+ getInstance().addPosTagset(RUSSIAN);
+ }
+
+ public static final TagSet<PosTag> ROMANIAN = new TagSet<PosTag>("CELI Romanian POS tags", "ro");
+ static {
+ ROMANIAN.addTag(new PosTag("N", LexicalCategory.Noun));
+ ROMANIAN.addTag(new PosTag("AJ", LexicalCategory.Adjective));
+ ROMANIAN.addTag(new PosTag("AV", LexicalCategory.Adverb));
+ ROMANIAN.addTag(new PosTag("AT", LexicalCategory.PronounOrDeterminer));
+ ROMANIAN.addTag(new PosTag("PD", LexicalCategory.PronounOrDeterminer));
+ ROMANIAN.addTag(new PosTag("C", LexicalCategory.Conjuction));
+ ROMANIAN.addTag(new PosTag("AP", LexicalCategory.Adposition));
+ ROMANIAN.addTag(new PosTag("V", LexicalCategory.Verb));
+ ROMANIAN.addTag(new PosTag("I", LexicalCategory.Interjection));
+ ROMANIAN.addTag(new PosTag("INVAR", LexicalCategory.Residual));
+ ROMANIAN.addTag(new PosTag("NU", LexicalCategory.Numeral));
+ ROMANIAN.addTag(new PosTag("R", LexicalCategory.Residual));
+ ROMANIAN.addTag(new PosTag("U", LexicalCategory.Unique));
+ getInstance().addPosTagset(ROMANIAN);
+ }
+
+ /*****************************************************************
+ * MORPHOLOGICAL FEATURES TAGSETS MAPPINGS TO OLIA ONTOLOGY:
+ * GENDER, NUMBER, PERSON, CASE, DEFINITENESS, VERB_FORM, TENSE
+ ****************************************************************/
+
+ public static final TagSet<GenderTag> GENDER = new TagSet<GenderTag>("CELI GENDER tags", "da","de","it","ro","ru");
+ static {
+ GENDER.addTag(new GenderTag("F", Gender.Feminine));
+ GENDER.addTag(new GenderTag("FEM", Gender.Feminine));
+ GENDER.addTag(new GenderTag("M", Gender.Masculine));
+ GENDER.addTag(new GenderTag("MAS", Gender.Masculine));
+ GENDER.addTag(new GenderTag("MASC", Gender.Masculine));
+ GENDER.addTag(new GenderTag("NE", Gender.Neuter));
+ GENDER.addTag(new GenderTag("NEU", Gender.Neuter));
+ GENDER.addTag(new GenderTag("UTR", Gender.Common));
+ getInstance().addGenderTagset(GENDER);
+ }
+
+
+ public static final TagSet<NumberTag> NUMBER = new TagSet<NumberTag>("CELI NUMBER tags", "da","de","it","ro","ru");
+ static {
+ NUMBER.addTag(new NumberTag("SGL", NumberFeature.Singular));
+ NUMBER.addTag(new NumberTag("SIN", NumberFeature.Singular));
+ NUMBER.addTag(new NumberTag("SING", NumberFeature.Singular));
+ NUMBER.addTag(new NumberTag("PLU", NumberFeature.Plural));
+ getInstance().addNumberTagset(NUMBER);
+ }
+
+ public static final TagSet<PersonTag> PERSON = new TagSet<PersonTag>("CELI PERSON tags", "da","de","it","ro","ru");
+ static {
+ PERSON.addTag(new PersonTag("FIRST", Person.First));
+ PERSON.addTag(new PersonTag("SECOND", Person.Second));
+ PERSON.addTag(new PersonTag("THIRD", Person.Third));
+ getInstance().addPersonTagset(PERSON);
+ }
+
+ public static final TagSet<CaseTag> CASE = new TagSet<CaseTag>("CELI CASE tags", "da","de","it","ro","ru");
+ static {
+ CASE.addTag(new CaseTag("NOMORPH", Case.Uninflected));
+ CASE.addTag(new CaseTag("NOM", Case.Nominative));
+ CASE.addTag(new CaseTag("GEN", Case.Genitive));
+ CASE.addTag(new CaseTag("GEN2", Case.Genitive));
+ CASE.addTag(new CaseTag("ACC", Case.Accusative));
+ CASE.addTag(new CaseTag("ACCAN", Case.Accusative));
+ CASE.addTag(new CaseTag("ACCNAN", Case.Accusative));
+ CASE.addTag(new CaseTag("ACC2", Case.Accusative));
+ CASE.addTag(new CaseTag("DAT", Case.Dative));
+ CASE.addTag(new CaseTag("DAT22", Case.Dative));
+ CASE.addTag(new CaseTag("INS", Case.Instrumental));
+ CASE.addTag(new CaseTag("INS2", Case.Instrumental));
+ CASE.addTag(new CaseTag("LOC", Case.Locative));
+ getInstance().addCaseTagset(CASE);
+ }
+
+ public static final TagSet<DefinitnessTag> DEFINITNESS = new TagSet<DefinitnessTag>("CELI DEFINITNESS tags", "da","de","it","ro","ru");
+ static {
+ DEFINITNESS.addTag(new DefinitnessTag("DEF", Definitness.Definite));
+ DEFINITNESS.addTag(new DefinitnessTag("INDEF", Definitness.Indefinite));
+ getInstance().addDefinitnessTagset(DEFINITNESS);
+ }
+
+ public static final TagSet<VerbMoodTag> VERB_FORM = new TagSet<VerbMoodTag>("CELI VERB FORM tags", "da","de","it","ro","ru");
+ static {
+ VERB_FORM.addTag(new VerbMoodTag("GERUND", VerbMood.Gerund));
+ VERB_FORM.addTag(new VerbMoodTag("GEROUNDPRS", VerbMood.Gerund));//
+ VERB_FORM.addTag(new VerbMoodTag("IMPERATIVE", VerbMood.ImperativeVerb));
+ VERB_FORM.addTag(new VerbMoodTag("IMP", VerbMood.ImperativeVerb));
+ VERB_FORM.addTag(new VerbMoodTag("INDIC", VerbMood.IndicativeVerb));
+ VERB_FORM.addTag(new VerbMoodTag("IND", VerbMood.IndicativeVerb));
+ VERB_FORM.addTag(new VerbMoodTag("CONGIUNT", VerbMood.SubjunctiveVerb));
+ VERB_FORM.addTag(new VerbMoodTag("SUBJ", VerbMood.SubjunctiveVerb));
+ VERB_FORM.addTag(new VerbMoodTag("SUB", VerbMood.SubjunctiveVerb));
+ VERB_FORM.addTag(new VerbMoodTag("INFIN", VerbMood.Infinitive));
+ VERB_FORM.addTag(new VerbMoodTag("INF", VerbMood.Infinitive));
+ VERB_FORM.addTag(new VerbMoodTag("PASTPART", VerbMood.Participle));
+ VERB_FORM.addTag(new VerbMoodTag("PASPART", VerbMood.Participle));
+ VERB_FORM.addTag(new VerbMoodTag("PCPRF", VerbMood.Participle));
+ VERB_FORM.addTag(new VerbMoodTag("PRESPART", VerbMood.Participle));
+ VERB_FORM.addTag(new VerbMoodTag("PRSPART", VerbMood.Participle));
+ VERB_FORM.addTag(new VerbMoodTag("PCPRS", VerbMood.Participle));
+ VERB_FORM.addTag(new VerbMoodTag("PART", VerbMood.Participle));
+ VERB_FORM.addTag(new VerbMoodTag("PTC", VerbMood.Participle));
+ VERB_FORM.addTag(new VerbMoodTag("CONDIZ", VerbMood.ConditionalVerb));
+ VERB_FORM.addTag(new VerbMoodTag("SUP", VerbMood.Supine));
+ getInstance().addVerbFormTagset(VERB_FORM);
+ }
+
+
+
+ public static final TagSet<TenseTag> TENSE = new TagSet<TenseTag>("CELI TENSE tags", "da","de","it","ro","ru");
+ static {
+ TENSE.addTag(new TenseTag("PRS", Tense.Present));
+ TENSE.addTag(new TenseTag("PRES", Tense.Present));
+ TENSE.addTag(new TenseTag("IMPER", Tense.Imperfect));
+ TENSE.addTag(new TenseTag("PER", Tense.Perfect));
+ TENSE.addTag(new TenseTag("PASSREM", Tense.RemotePast));
+ TENSE.addTag(new TenseTag("PSTPER", Tense.PastPerfect));
+ TENSE.addTag(new TenseTag("PST", Tense.Past));
+ TENSE.addTag(new TenseTag("FUT", Tense.Future));
+ TENSE.addTag(new TenseTag("PCPRF", Tense.Past));
+ TENSE.addTag(new TenseTag("PCPRS", Tense.Present));
+ TENSE.addTag(new TenseTag("PRT", Tense.Past));
+ TENSE.addTag(new TenseTag("PRSFUT", Tense.Present));
+ getInstance().addTenseTagset(TENSE);
+ }
+
+}
\ No newline at end of file
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java?rev=1399568&r1=1399567&r2=1399568&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/PosTagSetRegistry.java Thu Oct 18 09:51:37 2012
@@ -1,73 +0,0 @@
-package org.apache.stanbol.enhancer.engines.celi;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine;
-import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
-import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
-import org.apache.stanbol.enhancer.nlp.pos.PosTag;
-import org.apache.stanbol.enhancer.nlp.pos.olia.English;
-import org.apache.stanbol.enhancer.nlp.pos.olia.German;
-import org.apache.stanbol.enhancer.nlp.pos.olia.Spanish;
-
-/**
- * {@link TagSet}s for known CELI (linguagrid.org) POS models.<p>
- *
- * @author Rupert Westenthaler
- *
- */
-public final class PosTagSetRegistry {
-
- private static PosTagSetRegistry instance = new PosTagSetRegistry();
-
- private PosTagSetRegistry(){}
-
- private final Map<String, TagSet<PosTag>> models = new HashMap<String,TagSet<PosTag>>();
-
- public static PosTagSetRegistry getInstance(){
- return instance;
- }
-
- private void add(TagSet<PosTag> model) {
- for(String lang : model.getLanguages()){
- if(models.put(lang, model) != null){
- throw new IllegalStateException("Multiple Models for Language '"
- + lang+"'! This is an error in the static confituration of "
- + "this class. Please report this to the stanbol-dev mailing"
- + "list!");
- }
- }
- }
- /**
- * Getter for the {@link TagSet} by language. If no {@link TagSet}
- * is available for an Language this will return <code>null</code>
- * @param language the language
- * @return the AnnotationModel or <code>null</code> if non is defined
- */
- public TagSet<PosTag> getTagSet(String language){
- return models.get(language);
- }
-
- /**
- * TODO: create correct POS TagSets for the Languages supported by CELI
- * This creates a default set for all languages supported by the
- * CELI lemmatizer Engine
- */
- public static final TagSet<PosTag> ITALIEN = new TagSet<PosTag>("CELI Italien","it");
-
- static {
- ITALIEN.addTag(new PosTag("ADJ",LexicalCategory.Adjective));
- ITALIEN.addTag(new PosTag("ADV",LexicalCategory.Adverb));
- ITALIEN.addTag(new PosTag("ART",LexicalCategory.PronounOrDeterminer));
- ITALIEN.addTag(new PosTag("CLI")); //mapping ??
- ITALIEN.addTag(new PosTag("CONJ",LexicalCategory.Conjuction));
- ITALIEN.addTag(new PosTag("PREP",LexicalCategory.Adposition));
- ITALIEN.addTag(new PosTag("NF",LexicalCategory.Noun));
- ITALIEN.addTag(new PosTag("NM",LexicalCategory.Noun));
- ITALIEN.addTag(new PosTag("V",LexicalCategory.Verb));
- //add the PosSet to the registry
- getInstance().add(ITALIEN);
- }
-
-}
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java?rev=1399568&r1=1399567&r2=1399568&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngine.java Thu Oct 18 09:51:37 2012
@@ -50,7 +50,18 @@ import org.apache.felix.scr.annotations.
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
import org.apache.stanbol.enhancer.engines.celi.CeliConstants;
+import org.apache.stanbol.enhancer.engines.celi.CeliMorphoFeatures;
+import org.apache.stanbol.enhancer.engines.celi.CeliTagSetRegistry;
import org.apache.stanbol.enhancer.engines.celi.utils.Utils;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
+import org.apache.stanbol.enhancer.nlp.morpho.Case;
+import org.apache.stanbol.enhancer.nlp.morpho.Gender;
+import org.apache.stanbol.enhancer.nlp.morpho.NumberFeature;
+import org.apache.stanbol.enhancer.nlp.morpho.Person;
+import org.apache.stanbol.enhancer.nlp.morpho.Tense;
+import org.apache.stanbol.enhancer.nlp.morpho.TenseTag;
+import org.apache.stanbol.enhancer.nlp.morpho.VerbMood;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -67,22 +78,16 @@ import org.slf4j.LoggerFactory;
@Component(immediate = true, metatype = true)
@Service
-@Properties(value = {
- @Property(name = EnhancementEngine.PROPERTY_NAME, value = "celiLemmatizer"),
- @Property(name = CeliConstants.CELI_LICENSE),
- @Property(name = CeliConstants.CELI_TEST_ACCOUNT,boolValue=false)
-})
+@Properties(value = { @Property(name = EnhancementEngine.PROPERTY_NAME, value = "celiLemmatizer"), @Property(name = CeliConstants.CELI_LICENSE), @Property(name = CeliConstants.CELI_TEST_ACCOUNT, boolValue = false) })
public class CeliLemmatizerEnhancementEngine extends AbstractEnhancementEngine<IOException, RuntimeException> implements EnhancementEngine, ServiceProperties {
-
+ // TODO: check if it is OK to define new properties in the FISE namespace
+ public static final UriRef hasLemmaForm = new UriRef("http://fise.iks-project.eu/ontology/hasLemmaForm");
+
/**
- * This ensures that no connections to external services are made if Stanbol is started in offline mode
- * as the OnlineMode service will only be available if OfflineMode is deactivated.
+ * This ensures that no connections to external services are made if Stanbol is started in offline mode as the OnlineMode service will only be available if OfflineMode is deactivated.
*/
@Reference
- private OnlineMode onlineMode;
- //TODO: check if it is OK to define new properties in the FISE namespace
- public static final UriRef hasLemmaForm = new UriRef("http://fise.iks-project.eu/ontology/hasLemmaForm");
- public static final UriRef hasMorphoFeature = new UriRef("http://fise.iks-project.eu/ontology/hasMorphologicalFeature");
+ private OnlineMode onlineMode;
private static List<String> supportedLangs = new Vector<String>();
static {
@@ -99,17 +104,14 @@ public class CeliLemmatizerEnhancementEn
public static final Literal LANG_ID_ENGINE_NAME = LiteralFactory.getInstance().createTypedLiteral("org.apache.stanbol.enhancer.engines.celi.langid.impl.CeliLanguageIdentifierEnhancementEngine");
/**
- * The default value for the Execution of this Engine. Currently set to
- * {@link ServiceProperties#ORDERING_CONTENT_EXTRACTION}
+ * The default value for the Execution of this Engine. Currently set to {@link ServiceProperties#ORDERING_CONTENT_EXTRACTION}
*/
public static final Integer defaultOrder = ServiceProperties.ORDERING_CONTENT_EXTRACTION;
private Logger log = LoggerFactory.getLogger(getClass());
-
/**
- * This contains the only MIME type directly supported by this enhancement
- * engine.
+ * This contains the only MIME type directly supported by this enhancement engine.
*/
private static final String TEXT_PLAIN_MIMETYPE = "text/plain";
@@ -135,7 +137,7 @@ public class CeliLemmatizerEnhancementEn
protected void activate(ComponentContext ctx) throws IOException, ConfigurationException {
super.activate(ctx);
Dictionary<String, Object> properties = ctx.getProperties();
- this.licenseKey = Utils.getLicenseKey(properties,ctx.getBundleContext());
+ this.licenseKey = Utils.getLicenseKey(properties, ctx.getBundleContext());
String url = (String) properties.get(SERVICE_URL);
if (url == null || url.isEmpty()) {
throw new ConfigurationException(SERVICE_URL, String.format("%s : please configure the URL of the CELI Web Service (e.g. by" + "using the 'Configuration' tab of the Apache Felix Web Console).", getClass().getSimpleName()));
@@ -159,10 +161,8 @@ public class CeliLemmatizerEnhancementEn
@Override
public int canEnhance(ContentItem ci) throws EngineException {
String language = EnhancementEngineHelper.getLanguage(ci);
- if(language==null) {
- log.warn("Unable to enhance ContentItem {} because language of the Content is unknown." +
- "Please check that a language identification engine is active in this EnhancementChain).",
- ci.getUri());
+ if (language == null) {
+ log.warn("Unable to enhance ContentItem {} because language of the Content is unknown." + "Please check that a language identification engine is active in this EnhancementChain).", ci.getUri());
}
if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES) != null && this.isLangSupported(language))
return ENHANCE_ASYNC;
@@ -172,19 +172,15 @@ public class CeliLemmatizerEnhancementEn
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
- String language = EnhancementEngineHelper.getLanguage(ci);
- if (!isLangSupported(language)){
- throw new IllegalStateException("Call to computeEnhancement with unsupported language '"
- +language+" for ContentItem "+ ci.getUri() +": This is also checked "
- + "in the canEnhance method! -> This indicated an Bug in the "
- + "implementation of the " + "EnhancementJobManager!");
+ String language = EnhancementEngineHelper.getLanguage(ci);
+ if (!isLangSupported(language)) {
+ throw new IllegalStateException("Call to computeEnhancement with unsupported language '" + language + " for ContentItem " + ci.getUri() + ": This is also checked " + "in the canEnhance method! -> This indicated an Bug in the "
+ + "implementation of the " + "EnhancementJobManager!");
}
- Language lang = new Language(language); //clerezza language for PlainLiterals
+
Entry<UriRef, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
if (contentPart == null) {
- throw new IllegalStateException("No ContentPart with Mimetype '"
- + TEXT_PLAIN_MIMETYPE + "' found for ContentItem "
- + ci.getUri() + ": This is also checked in the canEnhance method! -> This "
+ throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This "
+ "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text;
@@ -198,72 +194,124 @@ public class CeliLemmatizerEnhancementEn
return;
}
- MGraph g = ci.getMetadata();
- LiteralFactory literalFactory = LiteralFactory.getInstance();
+ MGraph graph = ci.getMetadata();
if (this.completeMorphoAnalysis) {
- List<LexicalEntry> terms;
- try {
- terms = this.client.performMorfologicalAnalysis(text, language);
- } catch (IOException e) {
- throw new EngineException("Error while calling the CELI Lemmatizer"
- +" service (configured URL: "
- +serviceURL+")!",e);
- } catch (SOAPException e) {
- throw new EngineException("Error wile encoding/decoding the request/"
- +"response to the CELI lemmatizer service!",e);
- }
- //get a write lock before writing the enhancements
- ci.getLock().writeLock().lock();
- try {
- for (LexicalEntry le : terms) {
- if(!le.termReadings.isEmpty()){
- UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
- g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT,
- new PlainLiteralImpl(le.getWordForm(),lang)));
- if (le.from >= 0 && le.to > 0) {
- g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(le.from)));
- g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(le.to)));
- g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT,
- new PlainLiteralImpl(getSelectionContext(text, le.getWordForm(), le.from), lang)));
- }
- for (Reading r : le.termReadings) {
- g.add(new TripleImpl(textAnnotation, hasLemmaForm,
- new PlainLiteralImpl(r.getLemma(),lang)));
- for (Entry<String,String> entry : r.lexicalFeatures.entrySet()) {
- g.add(new TripleImpl(textAnnotation, hasMorphoFeature,
- literalFactory.createTypedLiteral(entry.getKey() + "=" + entry.getValue())));
- }
- }
- } //TODO: check if it is OK to ignore lexical entries with no readings
- }
- } finally {
- ci.getLock().writeLock().unlock();
- }
+ this.addMorphoAnalysisEnhancement(ci, text, language, graph);
} else {
- String lemmatizedContents;
- try {
- lemmatizedContents = this.client.lemmatizeContents(text, language);
- } catch (IOException e) {
- throw new EngineException("Error while calling the CELI Lemmatizer"
- +" service (configured URL: "
- +serviceURL+")!",e);
- } catch (SOAPException e) {
- throw new EngineException("Error wile encoding/decoding the request/"
- +"response to the CELI lemmatizer service!",e);
- }
- //get a write lock before writing the enhancements
- ci.getLock().writeLock().lock();
- try {
- UriRef textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
- g.add(new TripleImpl(textEnhancement, hasLemmaForm,
- new PlainLiteralImpl(lemmatizedContents,lang)));
- } finally {
- ci.getLock().writeLock().unlock();
- }
+ this.addLemmatizationEnhancement(ci, text, language, graph);
}
}
+ private void addMorphoAnalysisEnhancement(ContentItem ci, String text, String language, MGraph g) throws EngineException {
+ Language lang = new Language(language); // clerezza language for PlainLiterals
+ List<LexicalEntry> terms;
+ try {
+ terms = this.client.performMorfologicalAnalysis(text, language);
+ } catch (IOException e) {
+ throw new EngineException("Error while calling the CELI Lemmatizer" + " service (configured URL: " + serviceURL + ")!", e);
+ } catch (SOAPException e) {
+ throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI lemmatizer service!", e);
+ }
+ // get a write lock before writing the enhancements
+ ci.getLock().writeLock().lock();
+ try {
+ LiteralFactory literalFactory = LiteralFactory.getInstance();
+ for (LexicalEntry le : terms) {
+
+ List<CeliMorphoFeatures> mFeatures = this.convertLexicalEntryToMorphFeatures(le, language);
+ for (CeliMorphoFeatures feat : mFeatures) {
+ // Create a text annotation for each interpretation produced by the morphological analyzer
+ UriRef textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
+ g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(le.getWordForm(), lang)));
+ if (le.from >= 0 && le.to > 0) {
+ g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(le.from)));
+ g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(le.to)));
+ g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, le.getWordForm(), le.from), lang)));
+ }
+ g.addAll(feat.featuresAsTriples(textAnnotation, lang));
+ }
+ }
+ } finally {
+ ci.getLock().writeLock().unlock();
+ }
+ }
+
+ private void addLemmatizationEnhancement(ContentItem ci, String text, String language, MGraph g) throws EngineException {
+ Language lang = new Language(language); // clerezza language for PlainLiterals
+ String lemmatizedContents;
+ try {
+ lemmatizedContents = this.client.lemmatizeContents(text, language);
+ } catch (IOException e) {
+ throw new EngineException("Error while calling the CELI Lemmatizer" + " service (configured URL: " + serviceURL + ")!", e);
+ } catch (SOAPException e) {
+ throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI lemmatizer service!", e);
+ }
+ // get a write lock before writing the enhancements
+ ci.getLock().writeLock().lock();
+ try {
+ UriRef textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
+ g.add(new TripleImpl(textEnhancement, CeliLemmatizerEnhancementEngine.hasLemmaForm, new PlainLiteralImpl(lemmatizedContents, lang)));
+ } finally {
+ ci.getLock().writeLock().unlock();
+ }
+ }
+
+ private List<CeliMorphoFeatures> convertLexicalEntryToMorphFeatures(LexicalEntry le, String lang) {
+ CeliTagSetRegistry mappings = CeliTagSetRegistry.getInstance();
+ List<CeliMorphoFeatures> result = new Vector<CeliMorphoFeatures>();
+ if (!le.termReadings.isEmpty()) {
+ for (Reading r : le.termReadings) {
+ CeliMorphoFeatures morphoFeature = new CeliMorphoFeatures(r.getLemma());
+ for (Entry<String, List<String>> entry : r.lexicalFeatures.entrySet()) {
+
+ String feature = entry.getKey();
+ for (String value : entry.getValue()) {
+ try {
+ if (feature.equals("POS")) {
+ LexicalCategory pos = mappings.getPosTagset(lang).getTag(value).getCategory();
+ if (pos != null)
+ morphoFeature.addPos(pos);
+ } else if (feature.equals("CASE")) {
+ Case c = mappings.getCaseTagset(lang).getTag(value).getCase();
+ if (c != null)
+ morphoFeature.addCase(c);
+ } else if (feature.equals("GENDER")) {
+ Gender gen = mappings.getGenderTagset(lang).getTag(value).getGender();
+ if (gen != null)
+ morphoFeature.addGender(gen);
+ } else if (feature.equals("NUMBER")) {
+ NumberFeature num = mappings.getNumberTagset(lang).getTag(value).getNumber();
+ if (num != null)
+ morphoFeature.addNumber(num);
+ } else if (feature.equals("PERSON")) {
+ Person pers = mappings.getPersonTagset(lang).getTag(value).getPerson();
+ if (pers != null)
+ morphoFeature.addPerson(pers);
+ } else if (feature.equals("VERB_FORM") || feature.equals("VFORM")) {
+ VerbMood vForm = mappings.getVerbFormTagset(lang).getTag(value).getVerbForm();
+ if (vForm != null)
+ morphoFeature.addVerbForm(vForm);
+ } else if (feature.equals("TENSE") || feature.equals("VERB_TENSE")) {
+ TagSet<TenseTag> map = mappings.getTenseTagset(lang);
+ TenseTag tagg = map.getTag(value);
+ Tense tense = tagg.getTense();
+ if (tense != null)
+ morphoFeature.addTense(tense);
+ }
+ } catch (Exception e) {
+ System.err.println(entry.getKey() + " " + value);
+ }
+ }
+
+ }
+ result.add(morphoFeature);
+ }
+ }
+
+ return result;
+ }
+
private boolean isLangSupported(String language) {
if (supportedLangs.contains(language))
return true;
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java?rev=1399568&r1=1399567&r2=1399568&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/LemmatizerClientHTTP.java Thu Oct 18 09:51:37 2012
@@ -132,12 +132,18 @@ public class LemmatizerClientHTTP {
Element lemmaElm = (Element) lemmasList.item(j);
String lemma = lemmaElm.getTextContent();
NodeList features = ((Element)lemmaElm.getParentNode()).getElementsByTagNameNS("*","LexicalFeature");
- Hashtable<String,String> featuresMap=new Hashtable<String,String>();
+ Hashtable<String,List<String>> featuresMap=new Hashtable<String,List<String>>();
for(int k=0;features!=null && k<features.getLength();k++){
Element feat = (Element) features.item(k);
String name = feat.getAttribute("name");
String value = feat.getTextContent();
- featuresMap.put(name, value);
+ List<String> values=null;
+ if(featuresMap.containsKey(name))
+ values=featuresMap.get(name);
+ else
+ values=new Vector<String>();
+ values.add(value);
+ featuresMap.put(name, values);
}
Reading r=new Reading(lemma, featuresMap);
readings.add(r);
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java?rev=1399568&r1=1399567&r2=1399568&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/Reading.java Thu Oct 18 09:51:37 2012
@@ -17,13 +17,14 @@
package org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl;
import java.util.Hashtable;
+import java.util.List;
public class Reading {
String lemma;
- Hashtable<String,String> lexicalFeatures;
+ Hashtable<String,List<String>> lexicalFeatures;
- public Reading(String lemma, Hashtable<String, String> lexicalFeatures) {
+ public Reading(String lemma, Hashtable<String, List<String>> lexicalFeatures) {
super();
this.lemma = lemma;
this.lexicalFeatures = lexicalFeatures;
@@ -37,11 +38,11 @@ public class Reading {
this.lemma = lemma;
}
- public Hashtable<String, String> getLexicalFeatures() {
+ public Hashtable<String, List<String>> getLexicalFeatures() {
return lexicalFeatures;
}
- public void setLexicalFeatures(Hashtable<String, String> lexicalFeatures) {
+ public void setLexicalFeatures(Hashtable<String, List<String>> lexicalFeatures) {
this.lexicalFeatures = lexicalFeatures;
}
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java?rev=1399568&r1=1399567&r2=1399568&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/test/java/org/apache/stanbol/enhancer/engines/celi/lemmatizer/impl/CeliLemmatizerEnhancementEngineTest.java Thu Oct 18 09:51:37 2012
@@ -19,7 +19,6 @@ package org.apache.stanbol.enhancer.engi
import static org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.MORPHOLOGICAL_ANALYSIS;
import static org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.SERVICE_URL;
import static org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.hasLemmaForm;
-import static org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine.hasMorphoFeature;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
@@ -52,8 +51,12 @@ import org.apache.clerezza.rdf.core.impl
import org.apache.clerezza.rdf.ontologies.XSD;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
import org.apache.stanbol.enhancer.engines.celi.CeliConstants;
+import org.apache.stanbol.enhancer.engines.celi.CeliMorphoFeatures;
import org.apache.stanbol.enhancer.engines.celi.testutils.MockComponentContext;
import org.apache.stanbol.enhancer.engines.celi.testutils.TestUtils;
+import org.apache.stanbol.enhancer.nlp.morpho.Gender;
+import org.apache.stanbol.enhancer.nlp.morpho.NumberFeature;
+import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -69,12 +72,13 @@ import org.slf4j.LoggerFactory;
public class CeliLemmatizerEnhancementEngineTest {
- //static CeliLemmatizerEnhancementEngine morphoAnalysisEngine = new CeliLemmatizerEnhancementEngine();
+ static final String OLIA_NAMESPACE = "http://purl.org/olia/olia.owl#";
private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
private static final Logger log = LoggerFactory.getLogger(CeliLemmatizerEnhancementEngine.class);
private static final String TEXT = "Torino è la principale città del Piemonte.";
+ private static final String TERM = "casa";
public CeliLemmatizerEnhancementEngine initEngine(boolean completeMorphoAnalysis) throws IOException, ConfigurationException {
Dictionary<String, Object> properties = new Hashtable<String, Object>();
@@ -132,8 +136,7 @@ public class CeliLemmatizerEnhancementEn
validateEnhancement(ci.getMetadata(), (UriRef)lemmaTextAnnotation, expectedValues);
//validate the lemma form TextAnnotation
int lemmaForms = validateLemmaFormProperty(ci.getMetadata(), lemmaTextAnnotation,"it");
- assertTrue("Only a single LemmaForm property is expected if '"+
- MORPHOLOGICAL_ANALYSIS+"=false'",lemmaForms == 1);
+ assertTrue("Only a single LemmaForm property is expected if '"+ MORPHOLOGICAL_ANALYSIS+"=false'",lemmaForms == 1);
shutdownEngine(morphoAnalysisEngine);
}
@@ -141,7 +144,7 @@ public class CeliLemmatizerEnhancementEn
@Test
public void testCompleteMorphoAnalysis() throws Exception {
- ContentItem ci = wrapAsContentItem(TEXT);
+ ContentItem ci = wrapAsContentItem(TERM);
//add a simple triple to statically define the language of the test
//content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
@@ -169,10 +172,9 @@ public class CeliLemmatizerEnhancementEn
while (textAnnotationIterator.hasNext()) {
UriRef textAnnotation = (UriRef) textAnnotationIterator.next().getSubject();
// test if selected Text is added
- validateTextAnnotation(ci.getMetadata(), textAnnotation,TEXT,expectedValues);
+ validateTextAnnotation(ci.getMetadata(), textAnnotation,TERM,expectedValues);
textAnnotationCount++;
//perform additional tests for "hasMorphologicalFeature" and "hasLemmaForm"
- validateLemmaFormProperty(ci.getMetadata(), textAnnotation,"it");
validateMorphoFeatureProperty(ci.getMetadata(),textAnnotation);
}
log.info("{} TextAnnotations found and validated ...",textAnnotationCount);
@@ -198,8 +200,7 @@ public class CeliLemmatizerEnhancementEn
Resource lemmaForms = lemmaFormsIterator.next().getObject();
assertTrue("Lemma Forms value are expected of type PlainLiteral", lemmaForms instanceof PlainLiteral);
assertFalse("Lemma forms MUST NOT be empty",((PlainLiteral)lemmaForms).getLexicalForm().isEmpty());
- assertNotNull("Language of the Lemma Form literal MUST BE the same as for the parsed text",
- ((PlainLiteral)lemmaForms).getLanguage());
+ assertNotNull("Language of the Lemma Form literal MUST BE not null",((PlainLiteral)lemmaForms).getLanguage());
assertEquals("Language of the Lemma Form literal MUST BE the same as for the parsed text",
lang, ((PlainLiteral)lemmaForms).getLanguage().toString());
}
@@ -211,19 +212,55 @@ public class CeliLemmatizerEnhancementEn
* @param textAnnotation the TextAnnotation to check
*/
private void validateMorphoFeatureProperty(TripleCollection enhancements, NonLiteral textAnnotation) {
- Iterator<Triple> morphoFeatureIterator = enhancements.filter(textAnnotation, hasMorphoFeature, null);
- assertTrue("No Morpho Feature value found for TextAnnotation "+textAnnotation+"!", morphoFeatureIterator.hasNext());
+ //This taste checks for known morpho features of a given input (constant TERM)
+ Iterator<Triple> morphoFeatureIterator = enhancements.filter(textAnnotation, RDF_TYPE, null);
+ assertTrue("No POS Morpho Feature value found for TextAnnotation "+textAnnotation+"!", morphoFeatureIterator.hasNext());
while(morphoFeatureIterator.hasNext()){
Resource morphoFeature = morphoFeatureIterator.next().getObject();
- assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof TypedLiteral);
- String feature = ((Literal)morphoFeature).getLexicalForm();
+ assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof UriRef);
+ String feature=((UriRef)morphoFeature).getUnicodeString();
assertFalse("Morpho Feature MUST NOT be empty",feature.isEmpty());
- assertTrue("{key}={value} encoding expected (value:"+feature+")",feature.indexOf('=')>0);
- String[] keyValue = feature.split("=");
- assertTrue("{key}={value} encoding expected(value:"+feature+")",
- keyValue.length == 2 && (!keyValue[0].isEmpty()) && (!keyValue[1].isEmpty()));
- assertEquals("DataType of the Morpho Feature MUST BE xsd:string (for now)",XSD.string,
- ((TypedLiteral)morphoFeature).getDataType());
+ if(feature.startsWith(OLIA_NAMESPACE)){
+ String key=feature.substring(OLIA_NAMESPACE.length());
+ LexicalCategory cat=LexicalCategory.valueOf(key);
+ assertTrue("Part of Speech of "+TERM+" should be "+LexicalCategory.Noun , (cat==LexicalCategory.Noun));
+ }
+ }
+ morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_GENDER, null);
+ assertTrue("No Gender Morpho Feature value found for TextAnnotation "+textAnnotation+"!", morphoFeatureIterator.hasNext());
+ if(morphoFeatureIterator.hasNext()){
+ Resource morphoFeature = morphoFeatureIterator.next().getObject();
+ assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof UriRef);
+ String feature=((UriRef)morphoFeature).getUnicodeString();
+ assertFalse("Morpho Feature MUST NOT be empty",feature.isEmpty());
+ if(feature.startsWith(OLIA_NAMESPACE)){
+ String key=feature.substring(OLIA_NAMESPACE.length());
+ Gender cat=Gender.valueOf(key);
+ assertTrue("Gender of "+TERM+" should be "+Gender.Feminine , (cat==Gender.Feminine));
+ }
+ }
+ morphoFeatureIterator = enhancements.filter(textAnnotation, CeliMorphoFeatures.HAS_NUMBER, null);
+ assertTrue("No Number Morpho Feature value found for TextAnnotation "+textAnnotation+"!", morphoFeatureIterator.hasNext());
+ if(morphoFeatureIterator.hasNext()){
+ Resource morphoFeature = morphoFeatureIterator.next().getObject();
+ assertTrue("Morpho Feature value are expected of typed literal", morphoFeature instanceof UriRef);
+ String feature=((UriRef)morphoFeature).getUnicodeString();
+ assertFalse("Morpho Feature MUST NOT be empty",feature.isEmpty());
+ if(feature.startsWith(OLIA_NAMESPACE)){
+ String key=feature.substring(OLIA_NAMESPACE.length());
+ NumberFeature cat=NumberFeature.valueOf(key);
+ assertTrue("Number of "+TERM+" should be "+Gender.Feminine , (cat==NumberFeature.Singular));
+ }
+ }
+ morphoFeatureIterator = enhancements.filter(textAnnotation, CeliLemmatizerEnhancementEngine.hasLemmaForm, null);
+ assertTrue("No Number Morpho Feature value found for TextAnnotation "+textAnnotation+"!", morphoFeatureIterator.hasNext());
+ if(morphoFeatureIterator.hasNext()){
+ Resource morphoFeature = morphoFeatureIterator.next().getObject();
+ assertTrue("Lemma Forms value are expected of type PlainLiteral", morphoFeature instanceof PlainLiteral);
+ assertFalse("Lemma forms MUST NOT be empty",((PlainLiteral)morphoFeature).getLexicalForm().isEmpty());
+ String feature=((PlainLiteral)morphoFeature).getLexicalForm();
+ assertTrue("Lemma of "+TERM+" should be "+TERM , (feature.equals(TERM)));
}
+
}
}