You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by mb...@apache.org on 2008/05/02 20:27:25 UTC
svn commit: r652864 - in /incubator/uima/sandbox/trunk/OpenCalaisAnnotator:
desc/OpenCalaisAnnotator.xml
src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java
Author: mbaessler
Date: Fri May 2 11:27:25 2008
New Revision: 652864
URL: http://svn.apache.org/viewvc?rev=652864&view=rev
Log:
UIMA-1021
update annotator with more Calais meta data mappings
https://issues.apache.org/jira/browse/UIMA-1021
Modified:
incubator/uima/sandbox/trunk/OpenCalaisAnnotator/desc/OpenCalaisAnnotator.xml
incubator/uima/sandbox/trunk/OpenCalaisAnnotator/src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java
Modified: incubator/uima/sandbox/trunk/OpenCalaisAnnotator/desc/OpenCalaisAnnotator.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/OpenCalaisAnnotator/desc/OpenCalaisAnnotator.xml?rev=652864&r1=652863&r2=652864&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/OpenCalaisAnnotator/desc/OpenCalaisAnnotator.xml (original)
+++ incubator/uima/sandbox/trunk/OpenCalaisAnnotator/desc/OpenCalaisAnnotator.xml Fri May 2 11:27:25 2008
@@ -96,6 +96,106 @@
<typeDescription>
<name>org.apache.uima.calais.Person</name>
<description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.Anniversary</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.City</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.Company</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.Continent</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.Country</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.Currency</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.EmailAddress</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.Facility</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.FaxNumber</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.Holiday</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.IndustryTerm</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.NaturalDisaster</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.NaturalFeature</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.Organization</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.PhoneNumber</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.ProviceOrState</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.Region</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.Technology</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.URL</name>
+ <description/>
+ <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+ </typeDescription>
+ <typeDescription>
+ <name>org.apache.uima.calais.BaseType</name>
+ <description/>
<supertypeName>uima.tcas.Annotation</supertypeName>
<features>
<featureDescription>
Modified: incubator/uima/sandbox/trunk/OpenCalaisAnnotator/src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/OpenCalaisAnnotator/src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java?rev=652864&r1=652863&r2=652864&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/OpenCalaisAnnotator/src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java (original)
+++ incubator/uima/sandbox/trunk/OpenCalaisAnnotator/src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java Fri May 2 11:27:25 2008
@@ -62,9 +62,47 @@
private SAXParser saxParser;
+ private Type anniversaryType;
+
+ private Type cityType;
+
+ private Type companyType;
+
+ private Type continentType;
+
+ private Type countryType;
+
+ private Type currencyType;
+
+ private Type emailAddressType;
+
+ private Type facilityType;
+
+ private Type faxNumberType;
+
+ private Type holidayType;
+
+ private Type industryTermType;
+
+ private Type naturalDisasterType;
+
+ private Type naturalFeatureType;
+
+ private Type organizationType;
+
private Type personType;
- private Feature personCalaisTypeFeat;
+ private Type phoneNumberType;
+
+ private Type provinceOrStateType;
+
+ private Type regionType;
+
+ private Type technologyType;
+
+ private Type urlType;
+
+ private Feature calaisTypeFeat;
private String serviceParams;
@@ -72,6 +110,8 @@
private URL calaisService;
+ private HashMap<String, Type> typeMapping;
+
public void process(CAS aCas) throws AnalysisEngineProcessException {
try {
@@ -91,6 +131,7 @@
BufferedInputStream in = new BufferedInputStream(connection.getInputStream());
Document feedDoc = docBuilder.parse(in);
String RdfXmlContent = feedDoc.getDocumentElement().getTextContent();
+ // System.out.println(RdfXmlContent);
// create new InputStream for the RDF XML content
BufferedInputStream bufByteIn = new BufferedInputStream(new ByteArrayInputStream(
@@ -109,16 +150,18 @@
Iterator<DescriptionElement> elementIt = elements.iterator();
while (elementIt.hasNext()) {
DescriptionElement element = elementIt.next();
- if (element.getTypeURL().equals("http://s.opencalais.com/1/type/em/e/Person")) {
+ // if for the typeURL is a mapping available, create annotation in the CAS
+ Type currentType = this.typeMapping.get(element.getTypeURL());
+ if (currentType != null) {
+ // mapping is available, create annotation
+ // get reference element that contains the annotation span
DescriptionElement refElement = subjectMap.get(element.getAboutURL());
int begin = refElement.getOffset() - offset.getOffset();
int end = begin + refElement.getLength();
- AnnotationFS annotFs = aCas.createAnnotation(this.personType, begin, end);
- annotFs.setStringValue(this.personCalaisTypeFeat, element.getTypeURL().intern());
+ // create annotation
+ AnnotationFS annotFs = aCas.createAnnotation(currentType, begin, end);
+ annotFs.setStringValue(this.calaisTypeFeat, element.getTypeURL().intern());
aCas.addFsToIndexes(annotFs);
-
- // System.out.println("Found person: " + aCas.getDocumentText().substring(begin,end) + "
- // (" + begin + "," + end + ")");
}
}
} catch (IOException ex) {
@@ -200,7 +243,6 @@
} catch (MalformedURLException ex) {
throw new ResourceInitializationException(ex);
}
-
}
/*
@@ -211,8 +253,54 @@
public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException {
super.typeSystemInit(typeSystem);
+ // get types and features
this.personType = typeSystem.getType("org.apache.uima.calais.Person");
- this.personCalaisTypeFeat = this.personType.getFeatureByBaseName("calaisType");
+ this.anniversaryType = typeSystem.getType("org.apache.uima.calais.Anniversary");
+ this.cityType = typeSystem.getType("org.apache.uima.calais.City");
+ this.companyType = typeSystem.getType("org.apache.uima.calais.Company");
+ this.continentType = typeSystem.getType("org.apache.uima.calais.Continent");
+ this.countryType = typeSystem.getType("org.apache.uima.calais.Country");
+ this.currencyType = typeSystem.getType("org.apache.uima.calais.Currency");
+ this.emailAddressType = typeSystem.getType("org.apache.uima.calais.EmailAddress");
+ this.facilityType = typeSystem.getType("org.apache.uima.calais.Facility");
+ this.faxNumberType = typeSystem.getType("org.apache.uima.calais.FaxNumber");
+ this.holidayType = typeSystem.getType("org.apache.uima.calais.Holiday");
+ this.industryTermType = typeSystem.getType("org.apache.uima.calais.IndustryTerm");
+ this.naturalDisasterType = typeSystem.getType("org.apache.uima.calais.NaturalDisaster");
+ this.naturalFeatureType = typeSystem.getType("org.apache.uima.calais.NaturalFeature");
+ this.organizationType = typeSystem.getType("org.apache.uima.calais.Organization");
+ this.phoneNumberType = typeSystem.getType("org.apache.uima.calais.PhoneNumber");
+ this.provinceOrStateType = typeSystem.getType("org.apache.uima.calais.ProviceOrState");
+ this.regionType = typeSystem.getType("org.apache.uima.calais.Region");
+ this.technologyType = typeSystem.getType("org.apache.uima.calais.Technology");
+ this.urlType = typeSystem.getType("org.apache.uima.calais.URL");
+ this.calaisTypeFeat = this.personType.getFeatureByBaseName("calaisType");
+
+ // create type mapping HashMap
+ this.typeMapping = new HashMap<String, Type>(20);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Person", this.personType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Anniversary", this.anniversaryType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/City", this.cityType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Company", this.companyType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Continent", this.continentType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Country", this.countryType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Currency", this.currencyType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/EmailAddress", this.emailAddressType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Facility", this.facilityType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/FaxNumber", this.faxNumberType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Holiday", this.holidayType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/IndustryTerm", this.industryTermType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/NaturalDisaster",
+ this.naturalDisasterType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/NaturalFeature",
+ this.naturalFeatureType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Organization", this.organizationType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/PhoneNumber", this.phoneNumberType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/ProvinceOrState",
+ this.provinceOrStateType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Region", this.regionType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Technology", this.technologyType);
+ this.typeMapping.put("http://s.opencalais.com/1/type/em/e/URL", this.urlType);
}