You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/11/21 15:24:18 UTC
svn commit: r1412121 - in /stanbol/branches/stanbol-nlp-processing/enhancer:
engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/
engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/
engi...
Author: rwesten
Date: Wed Nov 21 14:24:16 2012
New Revision: 1412121
URL: http://svn.apache.org/viewvc?rev=1412121&view=rev
Log:
STANBOL-734: Some API simplifications in the MorphoFeatures class; STANBOL-733: moved implementation of the AnalysedText createAnalysedText(ContentItem ci, Blob blob) method to the abstract AnalysedTextFactory class as it was generically and not implementation specific; STANBOL-740: Minor adaption to the default configuration of the KeywordLinkingEngine, All static final constants should now be public; STANBOL-809: Accetently implemented this issue in this branch (instead of the trunk) so I will merge it from the branck to the trunk.
Removed:
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/DefinitnessTag.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/PersonTag.java
Modified:
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliMorphoFeatures.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliTagSetRegistry.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/TextProcessingConfig.java
stanbol/branches/stanbol-nlp-processing/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextFactory.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextUtils.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/AnalysedTextFactoryImpl.java
stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/MorphoFeatures.java
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliMorphoFeatures.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliMorphoFeatures.java?rev=1412121&r1=1412120&r2=1412121&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliMorphoFeatures.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliMorphoFeatures.java Wed Nov 21 14:24:16 2012
@@ -2,53 +2,31 @@ package org.apache.stanbol.enhancer.engi
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
-import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collection;
-import java.util.Collections;
-import java.util.EnumSet;
-import java.util.HashSet;
-import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
-import java.util.Set;
import java.util.Vector;
import org.apache.clerezza.rdf.core.Language;
-import org.apache.clerezza.rdf.core.LiteralFactory;
-import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
-import org.apache.commons.io.IOUtils;
import org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliLemmatizerEnhancementEngine;
import org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.Reading;
import org.apache.stanbol.enhancer.nlp.model.Token;
-import org.apache.stanbol.enhancer.nlp.model.annotation.Annotation;
-import org.apache.stanbol.enhancer.nlp.model.tag.TagSet;
import org.apache.stanbol.enhancer.nlp.morpho.Case;
import org.apache.stanbol.enhancer.nlp.morpho.CaseTag;
import org.apache.stanbol.enhancer.nlp.morpho.Definitness;
-import org.apache.stanbol.enhancer.nlp.morpho.DefinitnessTag;
-import org.apache.stanbol.enhancer.nlp.morpho.Gender;
import org.apache.stanbol.enhancer.nlp.morpho.GenderTag;
import org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures;
-import org.apache.stanbol.enhancer.nlp.morpho.NumberFeature;
import org.apache.stanbol.enhancer.nlp.morpho.NumberTag;
import org.apache.stanbol.enhancer.nlp.morpho.Person;
-import org.apache.stanbol.enhancer.nlp.morpho.PersonTag;
import org.apache.stanbol.enhancer.nlp.morpho.Tense;
import org.apache.stanbol.enhancer.nlp.morpho.TenseTag;
-import org.apache.stanbol.enhancer.nlp.morpho.VerbMood;
import org.apache.stanbol.enhancer.nlp.morpho.VerbMoodTag;
import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
import org.apache.stanbol.enhancer.nlp.pos.PosTag;
-import org.apache.stanbol.enhancer.servicesapi.ContentItem;
-import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
/**
* Represents a morphological interpretation of a {@link Token word}. Words might have different interpretations (typically depending on the POS) so this Tag allows to add information about all possible interpretations to a single word. This is
@@ -91,9 +69,9 @@ public class CeliMorphoFeatures extends
} else if (feature.equals("GENDER")) {
morphoFeature.addGender(tagRegistry.getGenderTag(lang,value));
} else if (feature.equals("NUMBER")) {
- morphoFeature.addNumber(tagRegistry.getNumberTag(lang,value));
+ morphoFeature.addNumber(tagRegistry.getNumber(lang,value));
} else if (feature.equals("PERSON")) {
- morphoFeature.addPerson(tagRegistry.getPersonTag(lang,value));
+ morphoFeature.addPerson(tagRegistry.getPerson(lang,value));
} else if (feature.equals("VERB_FORM") || feature.equals("VFORM")) {
morphoFeature.addVerbForm(tagRegistry.getVerbMoodTag(lang,value));
} else if (feature.equals("TENSE") || feature.equals("VERB_TENSE")) {
@@ -127,20 +105,16 @@ public class CeliMorphoFeatures extends
result.add(new TripleImpl(textAnnotation, HAS_NUMBER, num.getNumber().getUri()));
}
}
- for(PersonTag pers : getPersonList()){
- if(pers.getPerson() != null){
- result.add(new TripleImpl(textAnnotation, HAS_PERSON, pers.getPerson().getUri()));
- }
+ for(Person pers : getPersonList()){
+ result.add(new TripleImpl(textAnnotation, HAS_PERSON, pers.getUri()));
}
for(GenderTag gender : getGenderList()){
if(gender.getGender() != null){
result.add(new TripleImpl(textAnnotation, HAS_GENDER, gender.getGender().getUri()));
}
}
- for(DefinitnessTag def : getDefinitnessList()){
- if(def.getDefinitness() != null){
- result.add(new TripleImpl(textAnnotation, HAS_DEFINITENESS, def.getDefinitness().getUri()));
- }
+ for(Definitness def : getDefinitnessList()){
+ result.add(new TripleImpl(textAnnotation, HAS_DEFINITENESS, def.getUri()));
}
for(CaseTag caseFeat : getCaseList()){
if(caseFeat.getCase() != null){
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliTagSetRegistry.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliTagSetRegistry.java?rev=1412121&r1=1412120&r2=1412121&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliTagSetRegistry.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/CeliTagSetRegistry.java Wed Nov 21 14:24:16 2012
@@ -1,6 +1,7 @@
package org.apache.stanbol.enhancer.engines.celi;
import java.lang.reflect.InvocationTargetException;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -9,13 +10,11 @@ import org.apache.stanbol.enhancer.nlp.m
import org.apache.stanbol.enhancer.nlp.morpho.Case;
import org.apache.stanbol.enhancer.nlp.morpho.CaseTag;
import org.apache.stanbol.enhancer.nlp.morpho.Definitness;
-import org.apache.stanbol.enhancer.nlp.morpho.DefinitnessTag;
import org.apache.stanbol.enhancer.nlp.morpho.Gender;
import org.apache.stanbol.enhancer.nlp.morpho.GenderTag;
import org.apache.stanbol.enhancer.nlp.morpho.NumberFeature;
import org.apache.stanbol.enhancer.nlp.morpho.NumberTag;
import org.apache.stanbol.enhancer.nlp.morpho.Person;
-import org.apache.stanbol.enhancer.nlp.morpho.PersonTag;
import org.apache.stanbol.enhancer.nlp.morpho.Tense;
import org.apache.stanbol.enhancer.nlp.morpho.TenseTag;
import org.apache.stanbol.enhancer.nlp.morpho.VerbMood;
@@ -51,14 +50,12 @@ public final class CeliTagSetRegistry {
private final Map<String,TagSet<NumberTag>> numberMappingsByLanguage = new HashMap<String,TagSet<NumberTag>>();
private final Map<String,Map<String,NumberTag>> unmappedNumberTagsByLanguage = new HashMap<String,Map<String,NumberTag>>();
- private final Map<String,TagSet<PersonTag>> personMappingsByLanguage = new HashMap<String,TagSet<PersonTag>>();
- private final Map<String,Map<String,PersonTag>> unmappedPersonTagsByLanguage = new HashMap<String,Map<String,PersonTag>>();
+ private final Map<String,Map<String,Person>> personMappingsByLanguage = new HashMap<String,Map<String,Person>>();
private final Map<String,TagSet<CaseTag>> caseMappingsByLanguage = new HashMap<String,TagSet<CaseTag>>();
private final Map<String,Map<String,CaseTag>> unmappedCaseTagsByLanguage = new HashMap<String,Map<String,CaseTag>>();
- private final Map<String,TagSet<DefinitnessTag>> definitenessMappingsByLanguage = new HashMap<String,TagSet<DefinitnessTag>>();
- private final Map<String,Map<String,DefinitnessTag>> unmappedDefinitnessTagsByLanguage = new HashMap<String,Map<String,DefinitnessTag>>();
+ private final Map<String,Map<String,Definitness>> definitenessMappingsByLanguage = new HashMap<String,Map<String,Definitness>>();
private final Map<String,TagSet<VerbMoodTag>> verbFormMappingsByLanguage = new HashMap<String,TagSet<VerbMoodTag>>();
private final Map<String,Map<String,VerbMoodTag>> unmappedVerbMoodTagsByLanguage = new HashMap<String,Map<String,VerbMoodTag>>();
@@ -156,7 +153,7 @@ public final class CeliTagSetRegistry {
* the {@link String} tag as returned by CELI
* @return the {@link NumberTag}
*/
- public NumberTag getNumberTag(String language, String tag) {
+ public NumberTag getNumber(String language, String tag) {
return getTag(numberMappingsByLanguage, unmappedNumberTagsByLanguage, NumberTag.class, language, tag);
}
@@ -166,8 +163,8 @@ public final class CeliTagSetRegistry {
* @param mappings
* expressed with a {@link TagSet}
*/
- private void addPersonTagset(TagSet<PersonTag> model) {
- for (String lang : model.getLanguages()) {
+ private void addPersonMappings(Map<String,Person> model, String...langs) {
+ for (String lang : langs) {
if (personMappingsByLanguage.put(lang, model) != null) {
throw new IllegalStateException("Multiple Models for Language '" + lang
+ "'! This is an error in the static confituration of "
@@ -186,8 +183,9 @@ public final class CeliTagSetRegistry {
* the {@link String} tag as returned by CELI
* @return the {@link PersonTag}
*/
- public PersonTag getPersonTag(String language, String tag) {
- return getTag(personMappingsByLanguage, unmappedPersonTagsByLanguage, PersonTag.class, language, tag);
+ public Person getPerson(String language, String tag) {
+ Map<String,Person> langMappings = personMappingsByLanguage.get(language);
+ return langMappings == null ? null : langMappings.get(tag);
}
/**
@@ -226,8 +224,8 @@ public final class CeliTagSetRegistry {
* @param mappings
* expressed with a {@link TagSet}
*/
- private void addDefinitnessTagset(TagSet<DefinitnessTag> model) {
- for (String lang : model.getLanguages()) {
+ private void addDefinitnessTagset(Map<String,Definitness> model,String...langs) {
+ for (String lang : langs) {
if (definitenessMappingsByLanguage.put(lang, model) != null) {
throw new IllegalStateException("Multiple Models for Language '" + lang
+ "'! This is an error in the static confituration of "
@@ -246,9 +244,9 @@ public final class CeliTagSetRegistry {
* the {@link String} tag as returned by CELI
* @return the {@link DefinitnessTag}
*/
- public DefinitnessTag getDefinitnessTag(String language, String tag) {
- return getTag(definitenessMappingsByLanguage, unmappedDefinitnessTagsByLanguage,
- DefinitnessTag.class, language, tag);
+ public Definitness getDefinitnessTag(String language, String tag) {
+ Map<String,Definitness> langMappings = definitenessMappingsByLanguage.get(language);
+ return langMappings == null ? null : langMappings.get(tag);
}
/**
@@ -514,13 +512,15 @@ public final class CeliTagSetRegistry {
getInstance().addNumberTagset(NUMBER);
}
- public static final TagSet<PersonTag> PERSON = new TagSet<PersonTag>("CELI PERSON tags", "da", "de",
- "it", "ro", "ru");
+ //add the person models
static {
- PERSON.addTag(new PersonTag("FIRST", Person.First));
- PERSON.addTag(new PersonTag("SECOND", Person.Second));
- PERSON.addTag(new PersonTag("THIRD", Person.Third));
- getInstance().addPersonTagset(PERSON);
+ Map<String,Person> model = new HashMap<String,Person>();
+ model.put("FIRST", Person.First);
+ model.put("SECOND", Person.Second);
+ model.put("THIRD", Person.Third);
+ getInstance().addPersonMappings(
+ Collections.unmodifiableMap(model),
+ "da", "de","it", "ro", "ru");
}
public static final TagSet<CaseTag> CASE = new TagSet<CaseTag>("CELI CASE tags", "da", "de", "it", "ro",
@@ -542,12 +542,14 @@ public final class CeliTagSetRegistry {
getInstance().addCaseTagset(CASE);
}
- public static final TagSet<DefinitnessTag> DEFINITNESS = new TagSet<DefinitnessTag>(
- "CELI DEFINITNESS tags", "da", "de", "it", "ro", "ru");
+ //definitness models
static {
- DEFINITNESS.addTag(new DefinitnessTag("DEF", Definitness.Definite));
- DEFINITNESS.addTag(new DefinitnessTag("INDEF", Definitness.Indefinite));
- getInstance().addDefinitnessTagset(DEFINITNESS);
+ Map<String,Definitness> model = new HashMap<String,Definitness>();
+ model.put("DEF", Definitness.Definite);
+ model.put("INDEF", Definitness.Indefinite);
+ getInstance().addDefinitnessTagset(
+ Collections.unmodifiableMap(model),
+ "da", "de", "it", "ro", "ru");
}
public static final TagSet<VerbMoodTag> VERB_FORM = new TagSet<VerbMoodTag>("CELI VERB FORM tags", "da",
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java?rev=1412121&r1=1412120&r2=1412121&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/engine/KeywordLinkingEngine.java Wed Nov 21 14:24:16 2012
@@ -138,7 +138,7 @@ import org.slf4j.LoggerFactory;
boolValue=KeywordLinkingEngine.DEFAULT_PROCESS_ONLY_PROPER_NOUNS_STATE),
@Property(name=KeywordLinkingEngine.PROCESSED_LANGUAGES,
cardinality=Integer.MAX_VALUE,
- value={"*;lmmtip;uc=LINK", // link multiple matchable tokens in chunks; link upper case words
+ value={"*;lmmtip;uc=LINK;prop=0.75;pprob=0.75", // link multiple matchable tokens in chunks; link upper case words
"de;uc=MATCH", //in German all Nouns are upper case
"es;lc=Noun", //the OpenNLP POS tagger for Spanish does not support ProperNouns
"nl;lc=Noun"}), //same for Dutch
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/TextProcessingConfig.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/TextProcessingConfig.java?rev=1412121&r1=1412120&r2=1412121&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/TextProcessingConfig.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/keywordextraction/src/main/java/org/apache/stanbol/enhancer/engines/keywordextraction/linking/TextProcessingConfig.java Wed Nov 21 14:24:16 2012
@@ -91,7 +91,7 @@ public class TextProcessingConfig implem
* By default linking of chunks with multiple matchable tokens is enabled.
* This is useful to link Entities represented by two common nouns.
*/
- private static final boolean DEFAULT_LINK_MULTIPLE_MATCHABLE_TOKENS_IN_CHUNKS_STATE = true;
+ public static final boolean DEFAULT_LINK_MULTIPLE_MATCHABLE_TOKENS_IN_CHUNKS_STATE = true;
/**
* The set of {@link PosTag#getCategory()} considered for EntityLinking
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java?rev=1412121&r1=1412120&r2=1412121&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/engines/tika/src/main/java/org/apache/stanbol/enhancer/engines/tika/TikaEngine.java Wed Nov 21 14:24:16 2012
@@ -139,6 +139,7 @@ public class TikaEngine
private ContentItemFactory ciFactory;
private static class MediaTypeAndStream {
+ String uri;
MediaType mediaType;
InputStream in;
}
@@ -265,10 +266,13 @@ public class TikaEngine
private MediaTypeAndStream extractMediaType(ContentItem ci) {
MediaTypeAndStream mtas = new MediaTypeAndStream();
mtas.mediaType = getMediaType(ci.getBlob());
+ mtas.uri = ci.getUri().getUnicodeString();
if(mtas.mediaType == null || mtas.mediaType.equals(MediaType.OCTET_STREAM)){
mtas.in = new BufferedInputStream(ci.getStream());
+ Metadata m = new Metadata();
+ m.add(Metadata.RESOURCE_NAME_KEY, mtas.uri);
try {
- mtas.mediaType = detector.detect(mtas.in, new Metadata());
+ mtas.mediaType = detector.detect(mtas.in, m);
} catch (IOException e) {
log.warn("Exception while detection the MediaType of the" +
"parsed ContentItem "+ci.getUri(),e);
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java?rev=1412121&r1=1412120&r2=1412121&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java Wed Nov 21 14:24:16 2012
@@ -4,13 +4,7 @@ import org.apache.stanbol.enhancer.nlp.m
import org.apache.stanbol.enhancer.nlp.model.Chunk;
import org.apache.stanbol.enhancer.nlp.model.Token;
import org.apache.stanbol.enhancer.nlp.model.annotation.Annotation;
-import org.apache.stanbol.enhancer.nlp.morpho.CaseTag;
-import org.apache.stanbol.enhancer.nlp.morpho.GenderTag;
import org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures;
-import org.apache.stanbol.enhancer.nlp.morpho.NumberTag;
-import org.apache.stanbol.enhancer.nlp.morpho.PersonTag;
-import org.apache.stanbol.enhancer.nlp.morpho.TenseTag;
-import org.apache.stanbol.enhancer.nlp.morpho.VerbMoodTag;
import org.apache.stanbol.enhancer.nlp.ner.NerTag;
import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
import org.apache.stanbol.enhancer.nlp.pos.PosTag;
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextFactory.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextFactory.java?rev=1412121&r1=1412120&r2=1412121&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextFactory.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextFactory.java Wed Nov 21 14:24:16 2012
@@ -6,7 +6,17 @@ import org.apache.clerezza.rdf.core.UriR
import org.apache.stanbol.enhancer.nlp.model.impl.AnalysedTextFactoryImpl;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.NoSuchPartException;
+/**
+ * Abstract implementation of the {@link AnalysedTextFactory} that
+ * provides the implementation of the {@link #createAnalysedText(ContentItem, Blob)}
+ * based on the {@link #createAnalysedText(Blob)} method.
+ * <p>
+ * The {@link #getDefaultInstance()} methods returns the in-memory implementation
+ * of the AnalyzedText domain model and should only be used outside of an
+ * OSGI Service as implementation are also registered as OSGI services.
+ */
public abstract class AnalysedTextFactory {
private static AnalysedTextFactory defaultInstance = new AnalysedTextFactoryImpl();
@@ -29,7 +39,34 @@ public abstract class AnalysedTextFactor
* ContentItem.
* @throws IOException on any error while reading data from the parsed blob
*/
- public abstract AnalysedText createAnalysedText(ContentItem ci, Blob blob) throws IOException ;
+ public final AnalysedText createAnalysedText(ContentItem ci, Blob blob) throws IOException {
+ ci.getLock().readLock().lock();
+ try {
+ AnalysedText existing = ci.getPart(AnalysedText.ANALYSED_TEXT_URI, AnalysedText.class);
+ throw new IllegalStateException("The AnalysedText ContentPart already exists (impl: "
+ +existing.getClass().getSimpleName()+"| blob: "+existing.getBlob().getMimeType()+")");
+ }catch (NoSuchPartException e) {
+ //this is the expected case
+ }catch (ClassCastException e) {
+ throw new IllegalStateException("A ContentPart with the URI '"
+ + AnalysedText.ANALYSED_TEXT_URI+"' already exists but the parts "
+ + "type is not compatible with "+AnalysedText.class.getSimpleName()+"!",
+ e);
+ } finally {
+ ci.getLock().readLock().unlock();
+ }
+ //create the Analysed text
+ AnalysedText at = createAnalysedText(blob);
+ ci.getLock().writeLock().lock();
+ try {
+ //NOTE: there is a possibility that an other thread has added
+ // the contentpart
+ ci.addPart(AnalysedText.ANALYSED_TEXT_URI, at);
+ } finally {
+ ci.getLock().writeLock().unlock();
+ }
+ return at;
+ }
/**
* Creates a AnalysedText instance for the parsed blob.<p>
* NOTE: This implementation does NOT register the {@link AnalysedText}
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextUtils.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextUtils.java?rev=1412121&r1=1412120&r2=1412121&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextUtils.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextUtils.java Wed Nov 21 14:24:16 2012
@@ -41,7 +41,7 @@ public class AnalysedTextUtils {
ci.getLock().readLock().lock();
try {
return ci.getPart(AnalysedText.ANALYSED_TEXT_URI, AnalysedText.class);
- }catch (NoSuchPartException e) {
+ } catch (NoSuchPartException e) {
return null;
} finally {
ci.getLock().readLock().unlock();
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/AnalysedTextFactoryImpl.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/AnalysedTextFactoryImpl.java?rev=1412121&r1=1412120&r2=1412121&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/AnalysedTextFactoryImpl.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/AnalysedTextFactoryImpl.java Wed Nov 21 14:24:16 2012
@@ -9,8 +9,6 @@ import org.apache.felix.scr.annotations.
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
import org.apache.stanbol.enhancer.servicesapi.Blob;
-import org.apache.stanbol.enhancer.servicesapi.ContentItem;
-import org.apache.stanbol.enhancer.servicesapi.NoSuchPartException;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.osgi.framework.Constants;
@@ -22,36 +20,6 @@ import org.osgi.framework.Constants;
public class AnalysedTextFactoryImpl extends AnalysedTextFactory {
@Override
- public AnalysedText createAnalysedText(ContentItem ci, Blob blob) throws IOException {
- ci.getLock().readLock().lock();
- try {
- AnalysedText existing = ci.getPart(AnalysedText.ANALYSED_TEXT_URI, AnalysedText.class);
- throw new IllegalStateException("The AnalysedText ContentPart already exists (impl: "
- +existing.getClass().getSimpleName()+"| blob: "+existing.getBlob().getMimeType()+")");
- }catch (NoSuchPartException e) {
- //this is the expected case
- }catch (ClassCastException e) {
- throw new IllegalStateException("A ContentPart with the URI '"
- + AnalysedText.ANALYSED_TEXT_URI+"' already exists but the parts "
- + "type is not compatible with "+AnalysedText.class.getSimpleName()+"!",
- e);
- } finally {
- ci.getLock().readLock().unlock();
- }
- //create the Analysed text
- AnalysedText at = createAnalysedText(blob);
- ci.getLock().writeLock().lock();
- try {
- //NOTE: there is a possibility that an other thread has added
- // the contentpart
- ci.addPart(AnalysedText.ANALYSED_TEXT_URI, at);
- } finally {
- ci.getLock().writeLock().unlock();
- }
- return at;
- }
-
- @Override
public AnalysedText createAnalysedText(Blob blob) throws IOException {
String text = ContentItemHelper.getText(blob);
return new AnalysedTextImpl(blob,text);
Modified: stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/MorphoFeatures.java
URL: http://svn.apache.org/viewvc/stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/MorphoFeatures.java?rev=1412121&r1=1412120&r2=1412121&view=diff
==============================================================================
--- stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/MorphoFeatures.java (original)
+++ stanbol/branches/stanbol-nlp-processing/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/morpho/MorphoFeatures.java Wed Nov 21 14:24:16 2012
@@ -37,8 +37,8 @@ public class MorphoFeatures {
private Object genderTags;
private Object numberTags;
private Object caseFeatureTags;
- private Object personTags;
- private Object definitnessTags;
+ private Object personValue;
+ private Object definitnessValue;
private Object verbFormTags;
private Object tenseTags;
@@ -68,15 +68,15 @@ public class MorphoFeatures {
return getValues(caseFeatureTags, CaseTag.class);
}
- public final void addDefinitness(DefinitnessTag definitness) {
- definitnessTags = addTo(definitnessTags,definitness,DefinitnessTag.class);
+ public final void addDefinitness(Definitness definitness) {
+ definitnessValue = addTo(definitnessValue,definitness,Definitness.class);
}
- public final DefinitnessTag getDefinitness(){
- return getValue(definitnessTags, DefinitnessTag.class);
+ public final Definitness getDefinitness(){
+ return getValue(definitnessValue, Definitness.class);
}
- public final List<DefinitnessTag> getDefinitnessList(){
- return getValues(definitnessTags, DefinitnessTag.class);
+ public final List<Definitness> getDefinitnessList(){
+ return getValues(definitnessValue, Definitness.class);
}
public final void addGender(GenderTag gender) {
@@ -103,16 +103,16 @@ public class MorphoFeatures {
return getValues(numberTags, NumberTag.class);
}
- public void addPerson(PersonTag person) {
- personTags = addTo(personTags,person,PersonTag.class);
+ public void addPerson(Person person) {
+ personValue = addTo(personValue,person,Person.class);
}
- public final PersonTag getPerson(){
- return getValue(personTags, PersonTag.class);
+ public final Person getPerson(){
+ return getValue(personValue, Person.class);
}
- public final List<PersonTag> getPersonList(){
- return getValues(personTags, PersonTag.class);
+ public final List<Person> getPersonList(){
+ return getValues(personValue, Person.class);
}
public void addPos(PosTag pos) {
@@ -153,7 +153,7 @@ public class MorphoFeatures {
@Override
public int hashCode() {
- return lemma.hashCode() + posTags.hashCode() + genderTags.hashCode() + personTags.hashCode() + caseFeatureTags.hashCode() + definitnessTags.hashCode() + verbFormTags.hashCode() + tenseTags.hashCode();
+ return lemma.hashCode() + posTags.hashCode() + genderTags.hashCode() + personValue.hashCode() + caseFeatureTags.hashCode() + definitnessValue.hashCode() + verbFormTags.hashCode() + tenseTags.hashCode();
}
@Override
public boolean equals(Object o) {
@@ -161,7 +161,7 @@ public class MorphoFeatures {
MorphoFeatures lt = (MorphoFeatures) o;
return ((genderTags != null && genderTags.equals(lt.genderTags)) || (genderTags == null && lt.genderTags == null)) && ((caseFeatureTags != null && caseFeatureTags.equals(lt.caseFeatureTags)) || (caseFeatureTags == null && lt.caseFeatureTags == null))
&& ((tenseTags != null && tenseTags.equals(lt.tenseTags)) || (tenseTags == null && lt.tenseTags == null)) && ((numberTags != null && numberTags.equals(lt.numberTags)) || (numberTags == null && lt.numberTags == null))
- && ((definitnessTags != null && definitnessTags.equals(lt.definitnessTags)) || (definitnessTags == null && lt.definitnessTags == null)) && ((personTags != null && personTags.equals(lt.personTags)) || (personTags == null && lt.personTags == null))
+ && ((definitnessValue != null && definitnessValue.equals(lt.definitnessValue)) || (definitnessValue == null && lt.definitnessValue == null)) && ((personValue != null && personValue.equals(lt.personValue)) || (personValue == null && lt.personValue == null))
&& ((verbFormTags != null && verbFormTags.equals(lt.verbFormTags)) || (verbFormTags == null && lt.verbFormTags == null));
} else {
return false;
@@ -180,11 +180,11 @@ public class MorphoFeatures {
if(numberTags != null){
sb.append("| ").append(numberTags);
}
- if(personTags != null){
- sb.append("| ").append(personTags);
+ if(personValue != null){
+ sb.append("| ").append(personValue);
}
- if(definitnessTags != null){
- sb.append("| ").append(definitnessTags);
+ if(definitnessValue != null){
+ sb.append("| ").append(definitnessValue);
}
if(caseFeatureTags != null){
sb.append("| ").append(caseFeatureTags);