You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by mb...@apache.org on 2008/05/02 20:27:25 UTC

svn commit: r652864 - in /incubator/uima/sandbox/trunk/OpenCalaisAnnotator: desc/OpenCalaisAnnotator.xml src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java

Author: mbaessler
Date: Fri May  2 11:27:25 2008
New Revision: 652864

URL: http://svn.apache.org/viewvc?rev=652864&view=rev
Log:
UIMA-1021

update annotator with more Calais meta data mappings

https://issues.apache.org/jira/browse/UIMA-1021

Modified:
    incubator/uima/sandbox/trunk/OpenCalaisAnnotator/desc/OpenCalaisAnnotator.xml
    incubator/uima/sandbox/trunk/OpenCalaisAnnotator/src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java

Modified: incubator/uima/sandbox/trunk/OpenCalaisAnnotator/desc/OpenCalaisAnnotator.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/OpenCalaisAnnotator/desc/OpenCalaisAnnotator.xml?rev=652864&r1=652863&r2=652864&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/OpenCalaisAnnotator/desc/OpenCalaisAnnotator.xml (original)
+++ incubator/uima/sandbox/trunk/OpenCalaisAnnotator/desc/OpenCalaisAnnotator.xml Fri May  2 11:27:25 2008
@@ -96,6 +96,106 @@
         <typeDescription>
           <name>org.apache.uima.calais.Person</name>
           <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.Anniversary</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.City</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.Company</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.Continent</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.Country</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.Currency</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.EmailAddress</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.Facility</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.FaxNumber</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.Holiday</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.IndustryTerm</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.NaturalDisaster</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.NaturalFeature</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.Organization</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.PhoneNumber</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.ProviceOrState</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.Region</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.Technology</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.URL</name>
+          <description/>
+          <supertypeName>org.apache.uima.calais.BaseType</supertypeName>
+        </typeDescription>
+        <typeDescription>
+          <name>org.apache.uima.calais.BaseType</name>
+          <description/>
           <supertypeName>uima.tcas.Annotation</supertypeName>
           <features>
 				<featureDescription>

Modified: incubator/uima/sandbox/trunk/OpenCalaisAnnotator/src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/OpenCalaisAnnotator/src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java?rev=652864&r1=652863&r2=652864&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/OpenCalaisAnnotator/src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java (original)
+++ incubator/uima/sandbox/trunk/OpenCalaisAnnotator/src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java Fri May  2 11:27:25 2008
@@ -62,9 +62,47 @@
 
   private SAXParser saxParser;
 
+  private Type anniversaryType;
+
+  private Type cityType;
+
+  private Type companyType;
+
+  private Type continentType;
+
+  private Type countryType;
+
+  private Type currencyType;
+
+  private Type emailAddressType;
+
+  private Type facilityType;
+
+  private Type faxNumberType;
+
+  private Type holidayType;
+
+  private Type industryTermType;
+
+  private Type naturalDisasterType;
+
+  private Type naturalFeatureType;
+
+  private Type organizationType;
+
   private Type personType;
 
-  private Feature personCalaisTypeFeat;
+  private Type phoneNumberType;
+
+  private Type provinceOrStateType;
+
+  private Type regionType;
+
+  private Type technologyType;
+
+  private Type urlType;
+
+  private Feature calaisTypeFeat;
 
   private String serviceParams;
 
@@ -72,6 +110,8 @@
 
   private URL calaisService;
 
+  private HashMap<String, Type> typeMapping;
+
   public void process(CAS aCas) throws AnalysisEngineProcessException {
 
     try {
@@ -91,6 +131,7 @@
       BufferedInputStream in = new BufferedInputStream(connection.getInputStream());
       Document feedDoc = docBuilder.parse(in);
       String RdfXmlContent = feedDoc.getDocumentElement().getTextContent();
+      // System.out.println(RdfXmlContent);
 
       // create new InputStream for the RDF XML content
       BufferedInputStream bufByteIn = new BufferedInputStream(new ByteArrayInputStream(
@@ -109,16 +150,18 @@
       Iterator<DescriptionElement> elementIt = elements.iterator();
       while (elementIt.hasNext()) {
         DescriptionElement element = elementIt.next();
-        if (element.getTypeURL().equals("http://s.opencalais.com/1/type/em/e/Person")) {
+        // if for the typeURL is a mapping available, create annotation in the CAS
+        Type currentType = this.typeMapping.get(element.getTypeURL());
+        if (currentType != null) {
+          // mapping is available, create annotation
+          // get reference element that contains the annotation span
           DescriptionElement refElement = subjectMap.get(element.getAboutURL());
           int begin = refElement.getOffset() - offset.getOffset();
           int end = begin + refElement.getLength();
-          AnnotationFS annotFs = aCas.createAnnotation(this.personType, begin, end);
-          annotFs.setStringValue(this.personCalaisTypeFeat, element.getTypeURL().intern());
+          // create annotation
+          AnnotationFS annotFs = aCas.createAnnotation(currentType, begin, end);
+          annotFs.setStringValue(this.calaisTypeFeat, element.getTypeURL().intern());
           aCas.addFsToIndexes(annotFs);
-
-          // System.out.println("Found person: " + aCas.getDocumentText().substring(begin,end) + "
-          // (" + begin + "," + end + ")");
         }
       }
     } catch (IOException ex) {
@@ -200,7 +243,6 @@
     } catch (MalformedURLException ex) {
       throw new ResourceInitializationException(ex);
     }
-
   }
 
   /*
@@ -211,8 +253,54 @@
   public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException {
     super.typeSystemInit(typeSystem);
 
+    // get types and features
     this.personType = typeSystem.getType("org.apache.uima.calais.Person");
-    this.personCalaisTypeFeat = this.personType.getFeatureByBaseName("calaisType");
+    this.anniversaryType = typeSystem.getType("org.apache.uima.calais.Anniversary");
+    this.cityType = typeSystem.getType("org.apache.uima.calais.City");
+    this.companyType = typeSystem.getType("org.apache.uima.calais.Company");
+    this.continentType = typeSystem.getType("org.apache.uima.calais.Continent");
+    this.countryType = typeSystem.getType("org.apache.uima.calais.Country");
+    this.currencyType = typeSystem.getType("org.apache.uima.calais.Currency");
+    this.emailAddressType = typeSystem.getType("org.apache.uima.calais.EmailAddress");
+    this.facilityType = typeSystem.getType("org.apache.uima.calais.Facility");
+    this.faxNumberType = typeSystem.getType("org.apache.uima.calais.FaxNumber");
+    this.holidayType = typeSystem.getType("org.apache.uima.calais.Holiday");
+    this.industryTermType = typeSystem.getType("org.apache.uima.calais.IndustryTerm");
+    this.naturalDisasterType = typeSystem.getType("org.apache.uima.calais.NaturalDisaster");
+    this.naturalFeatureType = typeSystem.getType("org.apache.uima.calais.NaturalFeature");
+    this.organizationType = typeSystem.getType("org.apache.uima.calais.Organization");
+    this.phoneNumberType = typeSystem.getType("org.apache.uima.calais.PhoneNumber");
+    this.provinceOrStateType = typeSystem.getType("org.apache.uima.calais.ProviceOrState");
+    this.regionType = typeSystem.getType("org.apache.uima.calais.Region");
+    this.technologyType = typeSystem.getType("org.apache.uima.calais.Technology");
+    this.urlType = typeSystem.getType("org.apache.uima.calais.URL");
+    this.calaisTypeFeat = this.personType.getFeatureByBaseName("calaisType");
+
+    // create type mapping HashMap
+    this.typeMapping = new HashMap<String, Type>(20);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Person", this.personType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Anniversary", this.anniversaryType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/City", this.cityType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Company", this.companyType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Continent", this.continentType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Country", this.countryType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Currency", this.currencyType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/EmailAddress", this.emailAddressType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Facility", this.facilityType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/FaxNumber", this.faxNumberType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Holiday", this.holidayType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/IndustryTerm", this.industryTermType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/NaturalDisaster",
+            this.naturalDisasterType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/NaturalFeature",
+            this.naturalFeatureType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Organization", this.organizationType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/PhoneNumber", this.phoneNumberType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/ProvinceOrState",
+            this.provinceOrStateType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Region", this.regionType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/Technology", this.technologyType);
+    this.typeMapping.put("http://s.opencalais.com/1/type/em/e/URL", this.urlType);
 
   }