You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/10/18 11:53:37 UTC
svn commit: r1399569 - in
/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src:
main/java/org/apache/stanbol/enhancer/nlp/morpho/
main/java/org/apache/stanbol/enhancer/nlp/pos/
test/java/org/apache/stanbol/enhancer/nlp/morpho/
Author: rwesten
Date: Thu Oct 18 09:53:35 2012
New Revision: 1399569
URL: http://svn.apache.org/viewvc?rev=1399569&view=rev
Log:
STANBOL-733: Applied the patch provided by Alessio; Changed implementation of the TenseEnum, because creating EnumSet in the constructor of an Enum is not supported by Java
Added:
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/CaseTag.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/DefinitnessTag.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/GenderTag.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/NumberFeature.java
- copied, changed from r1393931, stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Number.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/NumberTag.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Person.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/PersonTag.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/TenseTag.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/VerbMood.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/VerbMoodTag.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/test/java/org/apache/stanbol/enhancer/nlp/morpho/
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/test/java/org/apache/stanbol/enhancer/nlp/morpho/TenseEnumTest.java
Removed:
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Number.java
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Case.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Tense.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Case.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Case.java?rev=1399569&r1=1399568&r2=1399569&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Case.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Case.java Thu Oct 18 09:53:35 2012
@@ -13,6 +13,7 @@ import org.apache.clerezza.rdf.core.UriR
* {@link #getParent()} and {@link #getTenses()}.
*/
public enum Case {
+
/**
* AbessiveCase expresses the lack or absence of the referent of the noun it marks. It has the meaning of
* the English preposition 'without' (Pei and Gaynor 1954: 3,35; Gove, et al. 1966: 3).
@@ -220,7 +221,7 @@ public enum Case {
* InstrumentalCase indicates that the referent of the noun it marks is the means of the accomplishment of
* the action expressed by the clause (http://purl.org/linguistics/gold/Instrumental)
*/
- nstrumental,
+ Instrumental,
/**
* InterablativeCase expresses that the referent of the noun it marks is the location from between which
* another referent is moving. It has the meaning 'from inbetween'.
@@ -451,6 +452,12 @@ public enum Case {
*/
Translative,
/**
+ * In many inflecting languages, there occur lexemes whose form does not change throughout the paradigm, e.g.,
+ * Russian papa "dad". For such forms, the category uninflected may be assigned. However, Uninflected is not to be confused with BaseForm
+ * that applies to forms in a paradigm where overt marking exists. Uninflected is a characteristic of lexemes, not individual tokens.
+ */
+ Uninflected,
+ /**
* Vocative case marks a noun whose referent is being addressed.
* (http://www.sil.org/linguistics/glossaryoflinguisticterms/WhatIsVocativeCase.htm 17.11.06)
*/
Added: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/CaseTag.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/CaseTag.java?rev=1399569&view=auto
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/CaseTag.java (added)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/CaseTag.java Thu Oct 18 09:53:35 2012
@@ -0,0 +1,65 @@
+package org.apache.stanbol.enhancer.nlp.morpho;
+
+import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.tag.Tag;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+
+/**
+ * An Case tag typically assigned by a Morphological Analyzer (an
+ * NLP component) to a {@link Token} <p>
+ * @author Alessio Bosca
+ */
+public class CaseTag extends Tag<CaseTag>{
+
+ private final Case caseCategory;
+ /**
+ * Creates a new Case tag for the parsed tag. The created Tag is not
+ * assigned to any {@link Case}.<p> This constructor can be used
+ * by {@link EnhancementEngine}s that encounter an Tag they do not know
+ * (e.g. that is not defined by the configured {@link TagSet}).<p>
+ * @param tag the Tag
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public CaseTag(String tag){
+ this(tag,null);
+ }
+ /**
+ * Creates a CaseTag that is assigned to a {@link Case}
+ * @param tag the tag
+ * @param case the lexical case or <code>null</code> if not known
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public CaseTag(String tag,Case caseCat){
+ super(tag);
+ this.caseCategory = caseCat;
+ }
+ /**
+ * The case of this tag (if known)
+ * @return the case or <code>null</code> if not mapped to any
+ */
+ public Case getCase(){
+ return this.caseCategory;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("CASE %s (%s)", tag,
+ caseCategory == null ? "none" : caseCategory.name());
+ }
+
+ @Override
+ public int hashCode() {
+ return tag.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return super.equals(obj) && obj instanceof CaseTag &&
+ (caseCategory == null && ((CaseTag)obj).caseCategory == null) ||
+ (caseCategory != null && caseCategory.equals(((CaseTag)obj).caseCategory));
+ }
+
+}
\ No newline at end of file
Added: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/DefinitnessTag.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/DefinitnessTag.java?rev=1399569&view=auto
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/DefinitnessTag.java (added)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/DefinitnessTag.java Thu Oct 18 09:53:35 2012
@@ -0,0 +1,63 @@
+package org.apache.stanbol.enhancer.nlp.morpho;
+
+import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.tag.Tag;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+
+/**
+ * An Definitness tag typically assigned by a Morphological Analyzer (an
+ * NLP component) to a {@link Token} <p>
+ * @author Alessio Bosca
+ */
+public class DefinitnessTag extends Tag<DefinitnessTag>{
+ private final Definitness definitnessCategory;
+ /**
+ * Creates a new Definitness tag for the parsed tag. The created Tag is not
+ * assigned to any {@link Definitness}.<p> This constructor can be used
+ * by {@link EnhancementEngine}s that encounter an Tag they do not know
+ * (e.g. that is not defined by the configured {@link TagSet}).<p>
+ * @param tag the Tag
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public DefinitnessTag(String tag){
+ this(tag,null);
+ }
+ /**
+ * Creates a DefinitnessTag that is assigned to a {@link Definitness}
+ * @param tag the tag
+ * @param case the lexical case or <code>null</code> if not known
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public DefinitnessTag(String tag, Definitness numberCategory){
+ super(tag);
+ this.definitnessCategory = numberCategory;
+ }
+ /**
+ * The definitness of this tag (if known)
+ * @return the Definitness or <code>null</code> if not mapped to any
+ */
+ public Definitness getDefinitness(){
+ return this.definitnessCategory;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("DEFINITNESS %s (%s)", tag,
+ definitnessCategory == null ? "none" : definitnessCategory.name());
+ }
+
+ @Override
+ public int hashCode() {
+ return tag.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return super.equals(obj) && obj instanceof DefinitnessTag &&
+ (definitnessCategory == null && ((DefinitnessTag)obj).definitnessCategory == null) ||
+ (definitnessCategory != null && definitnessCategory.equals(((DefinitnessTag)obj).definitnessCategory));
+ }
+}
Added: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/GenderTag.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/GenderTag.java?rev=1399569&view=auto
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/GenderTag.java (added)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/GenderTag.java Thu Oct 18 09:53:35 2012
@@ -0,0 +1,65 @@
+package org.apache.stanbol.enhancer.nlp.morpho;
+
+import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.tag.Tag;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+
+/**
+ * An Definitness tag typically assigned by a Morphological Analyzer (an
+ * NLP component) to a {@link Token} <p>
+ * @author Alessio Bosca
+ */
+public class GenderTag extends Tag<GenderTag>{
+
+ private final Gender genderCategory;
+ /**
+ * Creates a new Gender tag for the parsed tag. The created Tag is not
+ * assigned to any {@link Gender}.<p> This constructor can be used
+ * by {@link EnhancementEngine}s that encounter an Tag they do not know
+ * (e.g. that is not defined by the configured {@link TagSet}).<p>
+ * @param tag the Tag
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public GenderTag(String tag){
+ this(tag,null);
+ }
+ /**
+ * Creates a Gender that is assigned to a {@link Gender}
+ * @param tag the tag
+ * @param genderCategory the lexical Gender or <code>null</code> if not known
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public GenderTag(String tag,Gender genderCategory){
+ super(tag);
+ this.genderCategory = genderCategory;
+ }
+ /**
+ * The Gender of this tag (if known)
+ * @return the Gender or <code>null</code> if not mapped to any
+ */
+ public Gender getGender(){
+ return this.genderCategory;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("GENDER %s (%s)", tag,
+ genderCategory == null ? "none" : genderCategory.name());
+ }
+
+ @Override
+ public int hashCode() {
+ return tag.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return super.equals(obj) && obj instanceof GenderTag &&
+ (genderCategory == null && ((GenderTag)obj).genderCategory == null) ||
+ (genderCategory != null && genderCategory.equals(((GenderTag)obj).genderCategory));
+ }
+
+}
Copied: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/NumberFeature.java (from r1393931, stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Number.java)
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/NumberFeature.java?p2=stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/NumberFeature.java&p1=stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Number.java&r1=1393931&r2=1399569&rev=1399569&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Number.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/NumberFeature.java Thu Oct 18 09:53:35 2012
@@ -2,10 +2,10 @@ package org.apache.stanbol.enhancer.nlp.
import org.apache.clerezza.rdf.core.UriRef;
-public enum Number {
+public enum NumberFeature {
/**
- * MULTEXT-East feature Number="count" (Nouns in Serbian, Macedonian, Bulgarian), e.g., Bulgarian Ñка/Ñк,
- * ÑзовиÑа/ÑзовиÑ, Ñда/Ñд, ÑÑгана/ÑÑган, ÑбилеÑ/Ñбилей, ÑгÑла/ÑгÑл
+ * MULTEXT-East feature Number="count" (Nouns in Serbian, Macedonian, Bulgarian), e.g., Bulgarian ÃÂúð/ÃÂú,
+ * ÃÂ÷þòøÃâ¬Ã°/ÃÂ÷þòøÃâ¬, ÃÂôð/ÃÂô, ÃŽÃâ¬Ã³Ã°Ã½Ã°/ÃŽÃâ¬Ã³Ã°Ã½, ÎñøûõÃÂ/Îñøûõù, ÊóÊûð/ÊóÊû
*/
CountNumber,
/**
@@ -42,7 +42,7 @@ public enum Number {
static final String OLIA_NAMESPACE = "http://purl.org/olia/olia.owl#";
UriRef uri;
- Number() {
+ NumberFeature() {
uri = new UriRef(OLIA_NAMESPACE + name());
}
Added: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/NumberTag.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/NumberTag.java?rev=1399569&view=auto
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/NumberTag.java (added)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/NumberTag.java Thu Oct 18 09:53:35 2012
@@ -0,0 +1,64 @@
+package org.apache.stanbol.enhancer.nlp.morpho;
+
+import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.tag.Tag;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+
+/**
+ * An Number tag typically assigned by a Morphological Analyzer (an
+ * NLP component) to a {@link Token} <p>
+ * @author Alessio Bosca
+ */
+public class NumberTag extends Tag<NumberTag>{
+ private final NumberFeature numberCategory;
+ /**
+ * Creates a new Number tag for the parsed tag. The created Tag is not
+ * assigned to any {@link NumberFeature}.<p> This constructor can be used
+ * by {@link EnhancementEngine}s that encounter an Tag they do not know
+ * (e.g. that is not defined by the configured {@link TagSet}).<p>
+ * @param tag the Tag
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public NumberTag(String tag){
+ this(tag,null);
+ }
+ /**
+ * Creates a NumberFeature tag that is assigned to a {@link NumberFeature}
+ * @param tag the tag
+ * @param numberCategory the lexical Number or <code>null</code> if not known
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public NumberTag(String tag, NumberFeature numberCategory){
+ super(tag);
+ this.numberCategory = numberCategory;
+ }
+ /**
+ * Get the Number of this tag (if known)
+ * @return the NumberFeature or <code>null</code> if not mapped to any
+ */
+ public NumberFeature getNumber(){
+ return this.numberCategory;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("NUMBER %s (%s)", tag,
+ numberCategory == null ? "none" : numberCategory.name());
+ }
+
+ @Override
+ public int hashCode() {
+ return tag.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return super.equals(obj) && obj instanceof NumberTag &&
+ (numberCategory == null && ((NumberTag)obj).numberCategory == null) ||
+ (numberCategory != null && numberCategory.equals(((NumberTag)obj).numberCategory));
+ }
+
+}
Added: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Person.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Person.java?rev=1399569&view=auto
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Person.java (added)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Person.java Thu Oct 18 09:53:35 2012
@@ -0,0 +1,47 @@
+package org.apache.stanbol.enhancer.nlp.morpho;
+
+import org.apache.clerezza.rdf.core.UriRef;
+
+/**
+ * Enumeration representing the different persons of words based on the <a* href="http://purl.org/olia/olia.owl">OLIA</a> Ontology
+ *
+ */
+public enum Person {
+
+ /**
+ * Refers to the speaker and one or more nonparticipants, but not hearer(s). Contrasts with FirstPersonInclusive (Crystal 1997: 285).
+ * (http://purl.oclc.org/linguistics/gold/First)
+ */
+ First("FirstPerson"),
+ /**
+ * Refers to the person(s) the speaker is addressing (Crystal 1997: 285). (http://purl.oclc.org/linguistics/gold/Second)
+ */
+ Second("SecondPerson"),
+ /**
+ Third person is deictic reference to a referent(s) not identified as the speaker or addressee. For example in English "he", "she",
+ "they" or the third person singular verb suffix -s, e.g. in "Hesometimes flies."
+ (http://www.sil.org/linguistics/GlossaryOfLinguisticTerms/WhatIsThirdPersonDeixis.htm 20.11.06)
+ */
+ Third("ThirdPerson");
+
+ static final String OLIA_NAMESPACE = "http://purl.org/olia/olia.owl#";
+ UriRef uri;
+
+ Person() {
+ this(null);
+ }
+
+ Person(String name) {
+ uri = new UriRef(OLIA_NAMESPACE + (name == null ? name() : name));
+ }
+
+ public UriRef getUri() {
+ return uri;
+ }
+
+ @Override
+ public String toString() {
+ return uri.getUnicodeString();
+ }
+
+}
Added: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/PersonTag.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/PersonTag.java?rev=1399569&view=auto
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/PersonTag.java (added)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/PersonTag.java Thu Oct 18 09:53:35 2012
@@ -0,0 +1,64 @@
+package org.apache.stanbol.enhancer.nlp.morpho;
+
+import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.tag.Tag;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+
+/**
+ * An Person tag typically assigned by a Morphological Analyzer (an
+ * NLP component) to a {@link Token} <p>
+ * @author Alessio Bosca
+ */
+public class PersonTag extends Tag<PersonTag>{
+ private final Person personCategory;
+ /**
+ * Creates a new PersonTag for the parsed tag. The created Tag is not
+ * assigned to any {@link Person}.<p> This constructor can be used
+ * by {@link EnhancementEngine}s that encounter an Tag they do not know
+ * (e.g. that is not defined by the configured {@link TagSet}).<p>
+ * @param tag the Tag
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public PersonTag(String tag){
+ this(tag,null);
+ }
+ /**
+ * Creates a PersonTag that is assigned to a {@link Person}
+ * @param tag the tag
+ * @param personCategory the lexical Person or <code>null</code> if not known
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public PersonTag(String tag, Person personCategory){
+ super(tag);
+ this.personCategory = personCategory;
+ }
+ /**
+ * The case of this tag (if known)
+ * @return the case or <code>null</code> if not mapped to any
+ */
+ public Person getPerson(){
+ return this.personCategory;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("PERSON %s (%s)", tag,
+ personCategory == null ? "none" : personCategory.name());
+ }
+
+ @Override
+ public int hashCode() {
+ return tag.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return super.equals(obj) && obj instanceof PersonTag &&
+ (personCategory == null && ((PersonTag)obj).personCategory == null) ||
+ (personCategory != null && personCategory.equals(((PersonTag)obj).personCategory));
+ }
+
+}
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Tense.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Tense.java?rev=1399569&r1=1399568&r2=1399569&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Tense.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/Tense.java Thu Oct 18 09:53:35 2012
@@ -1,7 +1,9 @@
package org.apache.stanbol.enhancer.nlp.morpho;
import java.util.Collections;
+import java.util.EnumMap;
import java.util.EnumSet;
+import java.util.Map;
import java.util.Set;
import org.apache.clerezza.rdf.core.UriRef;
@@ -50,7 +52,6 @@ public enum Tense {
;
static final String OLIA_NAMESPACE = "http://purl.org/olia/olia.owl#";
UriRef uri;
- Set<Tense> tenses;
Tense parent;
Tense() {
@@ -66,11 +67,6 @@ public enum Tense {
Tense(String name,Tense parent) {
uri = new UriRef(OLIA_NAMESPACE + (name == null ? name() : name));
this.parent = parent;
- EnumSet<Tense> tenses = EnumSet.of(this);
- if(parent != null){
- tenses.addAll(parent.tenses);
- }
- this.tenses = Collections.unmodifiableSet(tenses);
}
/**
* Getter for the parent tense (e.g.
@@ -98,7 +94,7 @@ public enum Tense {
* tenses.
*/
public Set<Tense> getTenses() {
- return tenses;
+ return transitiveClosureMap.get(this);
}
public UriRef getUri() {
@@ -109,4 +105,27 @@ public enum Tense {
public String toString() {
return uri.getUnicodeString();
}
+
+ /**
+ * This is needed because one can not create EnumSet instances before the
+ * initialization of an Enum has finished.<p>
+ * To keep using the much faster {@link EnumSet} a static member initialised
+ * in an static {} block is used as a workaround. The {@link Tense#getTenses()}
+ * method does use this static member instead of a member variable
+ */
+ private static final Map<Tense,Set<Tense>> transitiveClosureMap;
+
+ static {
+ transitiveClosureMap = new EnumMap<Tense,Set<Tense>>(Tense.class);
+ for(Tense tense : Tense.values()){
+ Set<Tense> parents = EnumSet.of(tense);
+ Set<Tense> transParents = transitiveClosureMap.get(tense.getParent());
+ if(transParents != null){
+ parents.addAll(transParents);
+ } else if(tense.getParent() != null){
+ parents.add(tense.getParent());
+ } // else no parent
+ transitiveClosureMap.put(tense, parents);
+ }
+ }
}
Added: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/TenseTag.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/TenseTag.java?rev=1399569&view=auto
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/TenseTag.java (added)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/TenseTag.java Thu Oct 18 09:53:35 2012
@@ -0,0 +1,63 @@
+package org.apache.stanbol.enhancer.nlp.morpho;
+
+import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.tag.Tag;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+
+/**
+ * An Tense tag typically assigned by a Morphological Analyzer (an
+ * NLP component) to a {@link Token} <p>
+ * @author Alessio Bosca
+ */
+public class TenseTag extends Tag<TenseTag>{
+ private final Tense tenseCategory;
+ /**
+ * Creates a new TenseTag for the parsed tag. The created Tag is not
+ * assigned to any {@link Tense}.<p> This constructor can be used
+ * by {@link EnhancementEngine}s that encounter an Tag they do not know
+ * (e.g. that is not defined by the configured {@link TagSet}).<p>
+ * @param tag the Tag
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public TenseTag(String tag){
+ this(tag,null);
+ }
+ /**
+ * Creates a TenseTag that is assigned to a {@link Case}
+ * @param tag the tag
+ * @param tenseCategory the lexical Tense or <code>null</code> if not known
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public TenseTag(String tag, Tense tenseCategory){
+ super(tag);
+ this.tenseCategory = tenseCategory;
+ }
+ /**
+ * The case of this tag (if known)
+ * @return the Tense or <code>null</code> if not mapped to any
+ */
+ public Tense getTense(){
+ return this.tenseCategory;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("TENSE %s (%s)", tag,
+ tenseCategory == null ? "none" : tenseCategory.name());
+ }
+
+ @Override
+ public int hashCode() {
+ return tag.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return super.equals(obj) && obj instanceof TenseTag &&
+ (tenseCategory == null && ((TenseTag)obj).tenseCategory == null) ||
+ (tenseCategory != null && tenseCategory.equals(((TenseTag)obj).tenseCategory));
+ }
+}
Added: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/VerbMood.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/VerbMood.java?rev=1399569&view=auto
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/VerbMood.java (added)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/VerbMood.java Thu Oct 18 09:53:35 2012
@@ -0,0 +1,80 @@
+package org.apache.stanbol.enhancer.nlp.morpho;
+
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.UriRef;
+/**
+ * Enumeration representing the different verbal moods based on the <a* href="http://purl.org/olia/olia.owl">OLIA</a> Ontology
+ *
+ */
+public enum VerbMood {
+
+ /**
+ * An infinitive is the base form of a verb. It is unmarked for inflectional categories such as the following: Aspect, Modality, Number, Person and Tense.
+ * (http://www.sil.org/linguistics/GlossaryOfLinguisticTerms/WhatIsAnInfinitive.htm 19.09.06)
+ */
+ Infinitive,
+ /**
+ * A verbal noun is a noun formed directly as an inflexion of a verb or a verb stem, sharing at least in part its constructions.
+ * This term is applied especially to gerunds, and sometimes also to infinitives and supines. (http://en.wikipedia.org/wiki/Verbal_noun 19.09.06)
+ */
+ VerbalNoun,
+ /**
+ * A participle is a lexical item, derived from a verb that has some of the characteristics and functions of both verbs and adjectives.
+ * In English, participles may be used as adjectives, and in non-finite forms of verbs. (http://www.sil.org/linguistics/GlossaryOfLinguisticTerms/WhatIsAParticiple.htm 19.09.06)
+ */
+ /**
+ * Supine is a nonfinite form of motion verbs with functions similar to that of an infinitive (Angelika Adams)
+ */
+ Supine,
+ Participle,
+ /**
+ * A gerund is a kind of verbal noun that exists in some languages. In today's English, gerunds are nouns built from a verb with an '-ing' suffix.
+ * They can be used as the subject of a sentence, an object, or an object of preposition. They can also be used to complement a subject.
+ * Often, gerunds exist side-by-side with nouns that come from the same root but the gerund and the common noun have different shades of meaning.
+ * (http://en.wikipedia.org/wiki/Gerund, http://en.wikibooks.org/wiki/English:Gerund 19.09.06) property for a non-finite form of a verb other than the infinitive. (http://www.isocat.org/datcat/DC-2243)
+ */
+ Gerund,
+ /**
+ * A subjunctive verb is typically used to expresses wishes,commands (in subordinate clauses), emotion, possibility,
+ * judgment, necessity, and statements that are contrary to fact at present. (http://en.wikipedia.org/wiki/Subjunctive_mood 19.09.06)
+ */
+ SubjunctiveVerb,
+ /**
+ * A conditional verb is a verb form in many languages. It is used to express degrees of certainty or uncertainty and hypothesis
+ * about past, present, or future. Such forms often occur in conditional sentences. (http://en.wikipedia.org/wiki/Conditional_mood 19.09.06)
+ */
+ ConditionalVerb,
+ /**
+ * An imperative verb is used to express commands, direct requests, and prohibitions. Often, direct use of the imperative mood may appear
+ * blunt or even rude, so it is often used with care. Example: "Paul,read that book".(http://en.wikipedia.org/wiki/Grammatical_mood#Imperative_mood 19.09.06)
+ */
+ ImperativeVerb,
+ /**
+ * Indicative mood is used in factual statements. All intentions in speaking that a particular language does not put into another mood
+ * use the indicative. It is the most commonly used mood and is found in all languages.(http://en.wikipedia.org/wiki/Grammatical_mood#Indicative_mood 19.09.06)
+ */
+ IndicativeVerb,
+ ;
+ static final String OLIA_NAMESPACE = "http://purl.org/olia/olia.owl#";
+ UriRef uri;
+ VerbMood() {
+ this(null);
+ }
+
+ VerbMood(String name) {
+ uri = new UriRef(OLIA_NAMESPACE + (name == null ? name() : (name + "Verb Form")));
+ }
+
+ public UriRef getUri() {
+ return uri;
+ }
+
+ @Override
+ public String toString() {
+ return uri.getUnicodeString();
+ }
+}
+
Added: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/VerbMoodTag.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/VerbMoodTag.java?rev=1399569&view=auto
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/VerbMoodTag.java (added)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/VerbMoodTag.java Thu Oct 18 09:53:35 2012
@@ -0,0 +1,66 @@
+package org.apache.stanbol.enhancer.nlp.morpho;
+
+import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.tag.Tag;
+import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
+import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
+
+/**
+ * An VerbMood tag typically assigned by a Morphological Analyzer (an
+ * NLP component) to a {@link Token} <p>
+ * @author Alessio Bosca
+ */
+public class VerbMoodTag extends Tag<VerbMoodTag>{
+
+ private final VerbMood verbMood;
+ /**
+ * Creates a new VerbMoodTag for the parsed tag. The created Tag is not
+ * assigned to any {@link VerbMood}.<p> This constructor can be used
+ * by {@link EnhancementEngine}s that encounter an Tag they do not know
+ * (e.g. that is not defined by the configured {@link TagSet}).<p>
+ * @param tag the Tag
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public VerbMoodTag(String tag){
+ this(tag,null);
+ }
+ /**
+ * Creates a VerbMoodTag that is assigned to a {@link Case}
+ * @param tag the tag
+ * @param verbMood the lexical VerbMood or <code>null</code> if not known
+ * @throws IllegalArgumentException if the parsed tag is <code>null</code>
+ * or empty.
+ */
+ public VerbMoodTag(String tag, VerbMood verbMood){
+ super(tag);
+ this.verbMood = verbMood;
+ }
+ /**
+ * The verbMood of this tag (if known)
+ * @return the VerbMood or <code>null</code> if not mapped to any
+ */
+ public VerbMood getVerbForm(){
+ return this.verbMood;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("VERB FORM %s (%s)", tag,
+ verbMood == null ? "none" : verbMood.name());
+ }
+
+ @Override
+ public int hashCode() {
+ return tag.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ return super.equals(obj) && obj instanceof VerbMoodTag &&
+ (verbMood == null && ((VerbMoodTag)obj).verbMood == null) ||
+ (verbMood != null && verbMood.equals(((VerbMoodTag)obj).verbMood));
+ }
+
+}
+
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java?rev=1399569&r1=1399568&r2=1399569&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/pos/LexicalCategory.java Thu Oct 18 09:53:35 2012
@@ -88,7 +88,21 @@ public enum LexicalCategory {
* membership, such as negative particle, which are `unassigned' to any of the standard part-of-speech
* categories. (http://www.ilc.cnr.it/EAGLES96/annotate/node16.html#mp 19.09.06)
*/
- Unique, ;
+ Unique,
+ /**
+ * A numeral is a word, functioning most typically as an adjective or pronoun, that expresses a number,
+ * and relation to the number, such as one of the following: Quantity, Sequence, Frequency, Fraction.
+ * (http://www.sil.org/linguistics/GlossaryOfLinguisticTerms/WhatIsANumeral.htm 19.09.06)
+ */
+ Numeral,
+ /**
+ * Clitic Element covers only one aspect of the original MULTEXT-East (and ISOcat) definitions of cliticness, i.e., that an element is a clitic
+ */
+ Clitic,
+ /**
+ * Proper nouns (also called proper names) are the names of unique entities. (http://en.wikipedia.org/wiki/Noun 19.09.06)
+ */
+ ProperNoun,;
static final String OLIA_NAMESPACE = "http://purl.org/olia/olia.owl#";
UriRef uri;
Added: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/test/java/org/apache/stanbol/enhancer/nlp/morpho/TenseEnumTest.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/test/java/org/apache/stanbol/enhancer/nlp/morpho/TenseEnumTest.java?rev=1399569&view=auto
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/test/java/org/apache/stanbol/enhancer/nlp/morpho/TenseEnumTest.java (added)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/test/java/org/apache/stanbol/enhancer/nlp/morpho/TenseEnumTest.java Thu Oct 18 09:53:35 2012
@@ -0,0 +1,35 @@
+package org.apache.stanbol.enhancer.nlp.morpho;
+
+import java.util.Set;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+/**
+ * Had some problems with the initialization of {@link Tense} enum ...
+ * so this rather simple looking test ...
+ * @author Rupert Westenthaler
+ *
+ */
+public class TenseEnumTest {
+
+ /**
+ * Because the transitive closure can not be initialized
+ * in the constructor of the Tense this
+ * checkes if they are correctly written to the
+ * private static map
+ */
+ @Test
+ public void testTransitiveClosure(){
+ for(Tense tense : Tense.values()){
+ Set<Tense> transParent = tense.getTenses();
+ Tense test = tense;
+ while(test != null){
+ Assert.assertTrue(transParent.contains(test));
+ test = test.getParent();
+ }
+ }
+ }
+
+}