You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2008/05/27 18:03:41 UTC
svn commit: r660574 - in
/incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy:
CalaisTestCollectionReader.groovy CalaisTypes.groovy RdfProcessor.groovy
Author: schor
Date: Tue May 27 09:03:36 2008
New Revision: 660574
URL: http://svn.apache.org/viewvc?rev=660574&view=rev
Log:
no Jira - set eol-style to native for groovy files
Modified:
incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/CalaisTestCollectionReader.groovy (props changed)
incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/CalaisTypes.groovy (contents, props changed)
incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/RdfProcessor.groovy (contents, props changed)
Propchange: incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/CalaisTestCollectionReader.groovy
------------------------------------------------------------------------------
svn:eol-style = native
Modified: incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/CalaisTypes.groovy
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/CalaisTypes.groovy?rev=660574&r1=660573&r2=660574&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/CalaisTypes.groovy (original)
+++ incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/CalaisTypes.groovy Tue May 27 09:03:36 2008
@@ -1,448 +1,448 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.uima.annotator.calais_groovy
-
-
-
-
-public class CalaisTypes {
-
- static String INSTANCE_INFO = 'http://s.opencalais.com/1/type/sys/InstanceInfo'
-
-
- static getCalaisTypeNameEntity (typeName) {
- return "http://s.opencalais.com/1/type/em/e/$typeName"
- }
-
- static getCalaisTypeNameRelation (typeName) {
- return "http://s.opencalais.com/1/type/em/r/$typeName"
- }
-
- // calais entities become annotations of a same name
- // calais relations become annotations of same name, with refs to other entity annotations
-
- static calaisTypes = new NodeBuilder().types {
- entity {
- Anniversary {attrs { name() } }
- City {attrs { name() } }
- Company {attrs { name() } }
- Continent {attrs { name() } }
- Country {attrs { name() } }
- Currency {attrs { name() } } // is currency denomination, normalized, eg. USD (for $)
- EmailAddress {attrs { name() } }
- EntertainmentAwardEvent {attrs { name() } }
- Facility {attrs { name() } }
- FaxNumber {attrs { name() } }
- Holiday {attrs { name() } }
- IndustryTerm{attrs { name() } }
- MedicalCondition {attrs { name() } }
- Movie {attrs { name() } }
- MusicAlbum {attrs { name() } }
- MusicGroup {attrs { name() } }
- NaturalDisaster {attrs { name() } } // some normalization
- NaturalFeature {attrs { name() } }
- Organization {attrs { name() } }
- Person {attrs { name()
- persontype() } } // values N/A sports entertainment political etc.
- PhoneNumber {attrs { name() } }
- Product {attrs { name() } } // drug products
- ProvinceOrState {attrs { name() } }
- PublishedMedium {attrs { name() } }
- Region {attrs { name() } } // eg Far East
- SportsEvent {attrs { name() } }
- SportsGame {attrs { name() } }
- Technology {attrs { name() } }
- TVShow {attrs { name() } }
- URL {attrs { name() } }
- }
-
- factOrEvent {
- Acquisition {
- attrs {company_acquirer {range 'Company'} // values are refs to company
- company_beingacquired {range 'Company'} // values are refs to company
- status {
- allowedValues([
- 'announced',
- 'planned',
- 'cancelled',
- 'postponed',
- 'rumored',
- 'known']) } } }
- Alliance {
- attrs {company {range 'Company'; multivalued()}
- status {
- allowedValues([
- 'announced',
- 'planned',
- 'cancelled',
- 'postponed',
- 'rumored',
- 'known']) } } }
-
- AnalystEarningsEstimate {
- attrs {company_source {range 'Company'}
- person_source {range 'Person'}
- company_rated {range 'Company'}
- quarter {allowedValues([
- 'Q1', 'Q2', 'Q3', 'Q4',
- 'H1', 'NINE_MONTHS', 'FY']) }
- year()
- }
- }
-
- AnalystRecommendation {
- attrs {company_source {range 'Company'}
- person_source {range 'Person'}
- company_rated {range 'Company'}
- trend {allowedValues([
- 'upgraded', 'downgraded', 'reiterated',
- 'initiated'])}
- rank_new() // Strong Buy, Hold, etc.
- rank_old()
- }
- }
- Bankruptcy {
- attrs {
- company {range 'Company'}
- bankruptcystatus() // considered, expected to emerge
- date()
- }
- }
-
- BusinessRelation {
- attrs {
- company {range 'Company'; multivalued()}
- status {
- allowedValues([
- 'announced',
- 'planned',
- 'cancelled',
- 'postponed',
- 'rumored',
- 'known' ])
- }
- }
- }
- Buybacks {
- attrs {
- company {range 'Company'}
- date()
- }
- }
- CompanyAffiliates {
- attrs {
- company_affiliate {range 'Company'}
- company_parent {range 'Company'}
- relation() // e.g. subsidiary, division, child, parent
- }
- }
- CompanyCustomer {
- attrs { // only one of Company_customer/Organization_Customer is required
- company_provider {range 'Company'}
- company_customer {range 'Company'}
- organization_customer()
- }
- }
- CompanyEarningsAnnouncement {
- attrs {
- company {range 'Company'}
- quarter()
- year()
- }
- }
- CompanyEarningsGuidance {
- attrs {
- company {range 'Company'}
- quarter()
- year()
- trend()
- }
- }
- CompanyInvestment {
- attrs {
- company {range 'Company'}
- company_investor {range 'Company'}
- status()
- }
- }
- CompanyLegalIssues {
- attrs {
- company_sued {range 'Company'}
- sueddescription()
- company_plaintiff {range 'Company'}
- person_plaintiff {range 'Person'}
- lawsuitclass()
- date()
- }
- }
- CompanyLocation {
- attrs {
- company {range 'Company'}
- city {range 'City'}
- provinceorstate {range 'ProvinceOrState'}
- country {range 'Country'}
- }
- }
- CompanyMeeting {
- attrs {
- company {range 'Company'}
- companymeetingtype {
- allowedValues([
- 'AGM', 'EGM', "Shareholders' Meeting"])
- }
- country {range 'Country'}
- city {range 'City'}
- provinceorstate {range 'ProvinceOrState'}
- status()
- date()
- meetingsite()
- }
- }
- CompanyReorganization {
- attrs {
- company {range 'Company'}
- status()
- }
- }
- CompanyTechnology {
- attrs {
- company {range 'Company'}
- technology()
- }
- }
- ConferenceCall {
- attrs {
- company {range 'Company'}
- ccalltype()
- quarter()
- status {allowedValues([
- 'announced', 'rumored', 'planned',
- 'cancelled', 'postponed', 'known']) }
- date()
- }
- }
- CreditRating {
- attrs {
- company_source {range 'Company'}
- company_rated {range 'Company'}
- organization_rated {range 'Organization'}
- trend {allowedValues([
- 'affirms', 'assigns', 'changes', 'cuts', 'expects to change',
- 'puts', 'raises', 'rates', 'removes', 'says', 'withdraws']) }
- rank_new()
- rank_old()
- }
- }
- FamilyRelation {
- attrs {
- person {range 'Person'}
- person_relative () // not a range of person, just a string
- familyrelationtype()
- }
- }
- IPO {
- attrs {
- company {range 'Company'}
- status {allowedValues([
- 'planned', 'announced', 'delayed', 'known']) }
- date()
- }
- }
- JointVenture {
- attrs {
- company {range 'Company'; multivalued()}
- company_newname()
- status {allowedValues([
- 'announced', 'planned', 'cancelled', 'postponed',
- 'known'])
- }
- }
- }
- ManagementChange {
- attrs {
- company {range 'Company'}
- organization {range 'Organization'}
- person {range 'Person'}
- position()
- action {allowedValues([
- 'enters', 'leaves', 'retired'])
- }
- }
- }
- Merger {
- attrs {
- company {range 'Company'; multivalued()}
- status {allowedValues([
- 'announced', 'planned', 'cancelled',
- 'postponed', 'rumored', 'known']) }
- }
- }
- PersonEducation {
- attrs {
- person {range 'Person'}
- certification()
- degree()
- schoolororganization()
- }
- }
- PersonPolitical {
- attrs {
- person {range 'Person'}
- position()
- country {range 'Country'}
- provinceorstate {range 'ProvinceOrState'}
- city {range 'City'}
- }
- }
- PersonPoliticalPast {
- attrs {
- person {range 'Person'}
- position()
- country {range 'Country'}
- provinceorstate {range 'ProvinceOrState'}
- city {range 'City'}
- }
- }
- PersonProfessional {
- attrs {
- person {range 'Person'}
- position()
- company {range 'Company'}
- organization {range 'Organization'}
- }
- }
- PersonProfessionalPast {
- attrs {
- person {range 'Person'}
- position()
- company {range 'Company'}
- organization {range 'Organization'}
- }
- }
- Quotation {
- attrs {
- person {range 'Person'}
- quote()
- }
- }
- StockSplit {
- attrs {
- company {range 'Company'}
- }
- }
- }
- }
-
- static final String TYPE_PREFIX = 'org.apache.uima.calaisType.'
- /**
- * Run this to print out a UIMA type system description for the above calais types
- *
- * Types:
- * each entity has a type
- * each relation has a type
- * an instance of an entity or relation has begin/end plus a ref to an entity or a type
- */
- static void main(args) {
- def writer = new StringWriter()
- def utypes = new groovy.xml.MarkupBuilder(writer)
- utypes.types {
- typeDescription {
- name "${TYPE_PREFIX}Base"
- supertypeName 'uima.cas.TOP'
- }
- typeDescription {
- name "${TYPE_PREFIX}Entity"
- supertypeName "${TYPE_PREFIX}Base"
- }
- typeDescription {
- name "${TYPE_PREFIX}Relation"
- supertypeName "${TYPE_PREFIX}Base"
- }
- typeDescription {
- name "${TYPE_PREFIX}Instance"
- supertypeName "uima.tcas.Annotation"
- }
-
- typeDescription {
- name "${TYPE_PREFIX}EntityInstance"
- supertypeName "${TYPE_PREFIX}Instance"
- features {
- featureDescription {
- name 'entity'
- rangeTypeName "${TYPE_PREFIX}Entity"
- }
- }
- }
-
- typeDescription {
- name "${TYPE_PREFIX}RelationInstance"
- supertypeName "${TYPE_PREFIX}Instance"
- features {
- featureDescription {
- name 'relation'
- rangeTypeName "${TYPE_PREFIX}Relation"
- }
- }
- }
-
- calaisTypes.entity[0].each {
- def entityName = it.name()
- typeDescription {
- name "${TYPE_PREFIX}entity.${entityName}"
- supertypeName "${TYPE_PREFIX}Entity"
- features {
- featureDescription {
- name 'canonicalForm'
- rangeTypeName 'uima.cas.String'
- }
- }
- }
- }
-
- calaisTypes.factOrEvent[0].each {
- def relation = it
- typeDescription {
- name "${TYPE_PREFIX}relation.${relation.name()}"
- supertypeName "${TYPE_PREFIX}Relation"
- features {
- relation.attrs[0].each {
- def feat = it
- featureDescription {
- name feat.name()
- if (feat.allowedValues[0]) {
- rangeTypeName 'uima.cas.String'
- } else if (feat.range[0]) {
- if (feat.multivalued[0]) {
- rangeTypeName 'uima.cas.FSArray'
- elementType "${TYPE_PREFIX}entity.${feat.range[0].text()}"
- } else {
- rangeTypeName "${TYPE_PREFIX}entity.${feat.range[0].text()}"
- }
- } else {
- rangeTypeName 'uima.cas.String'
- }
- }
- }
- }
- }
- }
- }
- println writer.toString()
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.annotator.calais_groovy
+
+
+
+
+public class CalaisTypes {
+
+ static String INSTANCE_INFO = 'http://s.opencalais.com/1/type/sys/InstanceInfo'
+
+
+ static getCalaisTypeNameEntity (typeName) {
+ return "http://s.opencalais.com/1/type/em/e/$typeName"
+ }
+
+ static getCalaisTypeNameRelation (typeName) {
+ return "http://s.opencalais.com/1/type/em/r/$typeName"
+ }
+
+ // calais entities become annotations of a same name
+ // calais relations become annotations of same name, with refs to other entity annotations
+
+ static calaisTypes = new NodeBuilder().types {
+ entity {
+ Anniversary {attrs { name() } }
+ City {attrs { name() } }
+ Company {attrs { name() } }
+ Continent {attrs { name() } }
+ Country {attrs { name() } }
+ Currency {attrs { name() } } // is currency denomination, normalized, eg. USD (for $)
+ EmailAddress {attrs { name() } }
+ EntertainmentAwardEvent {attrs { name() } }
+ Facility {attrs { name() } }
+ FaxNumber {attrs { name() } }
+ Holiday {attrs { name() } }
+ IndustryTerm{attrs { name() } }
+ MedicalCondition {attrs { name() } }
+ Movie {attrs { name() } }
+ MusicAlbum {attrs { name() } }
+ MusicGroup {attrs { name() } }
+ NaturalDisaster {attrs { name() } } // some normalization
+ NaturalFeature {attrs { name() } }
+ Organization {attrs { name() } }
+ Person {attrs { name()
+ persontype() } } // values N/A sports entertainment political etc.
+ PhoneNumber {attrs { name() } }
+ Product {attrs { name() } } // drug products
+ ProvinceOrState {attrs { name() } }
+ PublishedMedium {attrs { name() } }
+ Region {attrs { name() } } // eg Far East
+ SportsEvent {attrs { name() } }
+ SportsGame {attrs { name() } }
+ Technology {attrs { name() } }
+ TVShow {attrs { name() } }
+ URL {attrs { name() } }
+ }
+
+ factOrEvent {
+ Acquisition {
+ attrs {company_acquirer {range 'Company'} // values are refs to company
+ company_beingacquired {range 'Company'} // values are refs to company
+ status {
+ allowedValues([
+ 'announced',
+ 'planned',
+ 'cancelled',
+ 'postponed',
+ 'rumored',
+ 'known']) } } }
+ Alliance {
+ attrs {company {range 'Company'; multivalued()}
+ status {
+ allowedValues([
+ 'announced',
+ 'planned',
+ 'cancelled',
+ 'postponed',
+ 'rumored',
+ 'known']) } } }
+
+ AnalystEarningsEstimate {
+ attrs {company_source {range 'Company'}
+ person_source {range 'Person'}
+ company_rated {range 'Company'}
+ quarter {allowedValues([
+ 'Q1', 'Q2', 'Q3', 'Q4',
+ 'H1', 'NINE_MONTHS', 'FY']) }
+ year()
+ }
+ }
+
+ AnalystRecommendation {
+ attrs {company_source {range 'Company'}
+ person_source {range 'Person'}
+ company_rated {range 'Company'}
+ trend {allowedValues([
+ 'upgraded', 'downgraded', 'reiterated',
+ 'initiated'])}
+ rank_new() // Strong Buy, Hold, etc.
+ rank_old()
+ }
+ }
+ Bankruptcy {
+ attrs {
+ company {range 'Company'}
+ bankruptcystatus() // considered, expected to emerge
+ date()
+ }
+ }
+
+ BusinessRelation {
+ attrs {
+ company {range 'Company'; multivalued()}
+ status {
+ allowedValues([
+ 'announced',
+ 'planned',
+ 'cancelled',
+ 'postponed',
+ 'rumored',
+ 'known' ])
+ }
+ }
+ }
+ Buybacks {
+ attrs {
+ company {range 'Company'}
+ date()
+ }
+ }
+ CompanyAffiliates {
+ attrs {
+ company_affiliate {range 'Company'}
+ company_parent {range 'Company'}
+ relation() // e.g. subsidiary, division, child, parent
+ }
+ }
+ CompanyCustomer {
+ attrs { // only one of Company_customer/Organization_Customer is required
+ company_provider {range 'Company'}
+ company_customer {range 'Company'}
+ organization_customer()
+ }
+ }
+ CompanyEarningsAnnouncement {
+ attrs {
+ company {range 'Company'}
+ quarter()
+ year()
+ }
+ }
+ CompanyEarningsGuidance {
+ attrs {
+ company {range 'Company'}
+ quarter()
+ year()
+ trend()
+ }
+ }
+ CompanyInvestment {
+ attrs {
+ company {range 'Company'}
+ company_investor {range 'Company'}
+ status()
+ }
+ }
+ CompanyLegalIssues {
+ attrs {
+ company_sued {range 'Company'}
+ sueddescription()
+ company_plaintiff {range 'Company'}
+ person_plaintiff {range 'Person'}
+ lawsuitclass()
+ date()
+ }
+ }
+ CompanyLocation {
+ attrs {
+ company {range 'Company'}
+ city {range 'City'}
+ provinceorstate {range 'ProvinceOrState'}
+ country {range 'Country'}
+ }
+ }
+ CompanyMeeting {
+ attrs {
+ company {range 'Company'}
+ companymeetingtype {
+ allowedValues([
+ 'AGM', 'EGM', "Shareholders' Meeting"])
+ }
+ country {range 'Country'}
+ city {range 'City'}
+ provinceorstate {range 'ProvinceOrState'}
+ status()
+ date()
+ meetingsite()
+ }
+ }
+ CompanyReorganization {
+ attrs {
+ company {range 'Company'}
+ status()
+ }
+ }
+ CompanyTechnology {
+ attrs {
+ company {range 'Company'}
+ technology()
+ }
+ }
+ ConferenceCall {
+ attrs {
+ company {range 'Company'}
+ ccalltype()
+ quarter()
+ status {allowedValues([
+ 'announced', 'rumored', 'planned',
+ 'cancelled', 'postponed', 'known']) }
+ date()
+ }
+ }
+ CreditRating {
+ attrs {
+ company_source {range 'Company'}
+ company_rated {range 'Company'}
+ organization_rated {range 'Organization'}
+ trend {allowedValues([
+ 'affirms', 'assigns', 'changes', 'cuts', 'expects to change',
+ 'puts', 'raises', 'rates', 'removes', 'says', 'withdraws']) }
+ rank_new()
+ rank_old()
+ }
+ }
+ FamilyRelation {
+ attrs {
+ person {range 'Person'}
+ person_relative () // not a range of person, just a string
+ familyrelationtype()
+ }
+ }
+ IPO {
+ attrs {
+ company {range 'Company'}
+ status {allowedValues([
+ 'planned', 'announced', 'delayed', 'known']) }
+ date()
+ }
+ }
+ JointVenture {
+ attrs {
+ company {range 'Company'; multivalued()}
+ company_newname()
+ status {allowedValues([
+ 'announced', 'planned', 'cancelled', 'postponed',
+ 'known'])
+ }
+ }
+ }
+ ManagementChange {
+ attrs {
+ company {range 'Company'}
+ organization {range 'Organization'}
+ person {range 'Person'}
+ position()
+ action {allowedValues([
+ 'enters', 'leaves', 'retired'])
+ }
+ }
+ }
+ Merger {
+ attrs {
+ company {range 'Company'; multivalued()}
+ status {allowedValues([
+ 'announced', 'planned', 'cancelled',
+ 'postponed', 'rumored', 'known']) }
+ }
+ }
+ PersonEducation {
+ attrs {
+ person {range 'Person'}
+ certification()
+ degree()
+ schoolororganization()
+ }
+ }
+ PersonPolitical {
+ attrs {
+ person {range 'Person'}
+ position()
+ country {range 'Country'}
+ provinceorstate {range 'ProvinceOrState'}
+ city {range 'City'}
+ }
+ }
+ PersonPoliticalPast {
+ attrs {
+ person {range 'Person'}
+ position()
+ country {range 'Country'}
+ provinceorstate {range 'ProvinceOrState'}
+ city {range 'City'}
+ }
+ }
+ PersonProfessional {
+ attrs {
+ person {range 'Person'}
+ position()
+ company {range 'Company'}
+ organization {range 'Organization'}
+ }
+ }
+ PersonProfessionalPast {
+ attrs {
+ person {range 'Person'}
+ position()
+ company {range 'Company'}
+ organization {range 'Organization'}
+ }
+ }
+ Quotation {
+ attrs {
+ person {range 'Person'}
+ quote()
+ }
+ }
+ StockSplit {
+ attrs {
+ company {range 'Company'}
+ }
+ }
+ }
+ }
+
+ static final String TYPE_PREFIX = 'org.apache.uima.calaisType.'
+ /**
+ * Run this to print out a UIMA type system description for the above calais types
+ *
+ * Types:
+ * each entity has a type
+ * each relation has a type
+ * an instance of an entity or relation has begin/end plus a ref to an entity or a type
+ */
+ static void main(args) {
+ def writer = new StringWriter()
+ def utypes = new groovy.xml.MarkupBuilder(writer)
+ utypes.types {
+ typeDescription {
+ name "${TYPE_PREFIX}Base"
+ supertypeName 'uima.cas.TOP'
+ }
+ typeDescription {
+ name "${TYPE_PREFIX}Entity"
+ supertypeName "${TYPE_PREFIX}Base"
+ }
+ typeDescription {
+ name "${TYPE_PREFIX}Relation"
+ supertypeName "${TYPE_PREFIX}Base"
+ }
+ typeDescription {
+ name "${TYPE_PREFIX}Instance"
+ supertypeName "uima.tcas.Annotation"
+ }
+
+ typeDescription {
+ name "${TYPE_PREFIX}EntityInstance"
+ supertypeName "${TYPE_PREFIX}Instance"
+ features {
+ featureDescription {
+ name 'entity'
+ rangeTypeName "${TYPE_PREFIX}Entity"
+ }
+ }
+ }
+
+ typeDescription {
+ name "${TYPE_PREFIX}RelationInstance"
+ supertypeName "${TYPE_PREFIX}Instance"
+ features {
+ featureDescription {
+ name 'relation'
+ rangeTypeName "${TYPE_PREFIX}Relation"
+ }
+ }
+ }
+
+ calaisTypes.entity[0].each {
+ def entityName = it.name()
+ typeDescription {
+ name "${TYPE_PREFIX}entity.${entityName}"
+ supertypeName "${TYPE_PREFIX}Entity"
+ features {
+ featureDescription {
+ name 'canonicalForm'
+ rangeTypeName 'uima.cas.String'
+ }
+ }
+ }
+ }
+
+ calaisTypes.factOrEvent[0].each {
+ def relation = it
+ typeDescription {
+ name "${TYPE_PREFIX}relation.${relation.name()}"
+ supertypeName "${TYPE_PREFIX}Relation"
+ features {
+ relation.attrs[0].each {
+ def feat = it
+ featureDescription {
+ name feat.name()
+ if (feat.allowedValues[0]) {
+ rangeTypeName 'uima.cas.String'
+ } else if (feat.range[0]) {
+ if (feat.multivalued[0]) {
+ rangeTypeName 'uima.cas.FSArray'
+ elementType "${TYPE_PREFIX}entity.${feat.range[0].text()}"
+ } else {
+ rangeTypeName "${TYPE_PREFIX}entity.${feat.range[0].text()}"
+ }
+ } else {
+ rangeTypeName 'uima.cas.String'
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ println writer.toString()
+ }
+}
Propchange: incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/CalaisTypes.groovy
------------------------------------------------------------------------------
svn:eol-style = native
Modified: incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/RdfProcessor.groovy
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/RdfProcessor.groovy?rev=660574&r1=660573&r2=660574&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/RdfProcessor.groovy (original)
+++ incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/RdfProcessor.groovy Tue May 27 09:03:36 2008
@@ -1,187 +1,187 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.uima.annotator.calais_groovy
-
-import org.apache.uima.jcas.cas.FSArray
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase
-import org.apache.uima.jcas.JCas
-
-
-public class RdfProcessor extends JCasAnnotator_ImplBase{
-
- static debugPrint = 0
-
- def descriptionMap // map, key = url-like ids, value = description node
- def entityMap
- def relationMap
- def instances
- static final multiCompanyRelations = ['Alliance', 'BusinessRelation', 'JointVenture', 'Merger']
-
-
- /* (non-Javadoc)
- * @see org.apache.uima.analysis_component.JCasAnnotator_ImplBase#process(org.apache.uima.jcas.JCas)
- */
- public void process(JCas jcas){
- def rdfindex = jcas.getIndexRepository().getIndex("org.apache.uima.annotator.calais.Rdf")
- rdfindex.each { processRdf(it, jcas) } // should only be one
- }
-
- def processRdf(rdfTextInstance, jcas) {
- // 4 passes
- // pass 1 - create url to node map
- // pass 2 - create entities
- // pass 3 - create relations (point to entities)
- // pass 4 - create instances of entities and relations -
- // ref entities and relations, and have begin/end refs to text
-
- // pass 2, 3, and 4 blended and done in 1 iteration.
- def rdf = new XmlSlurper().parseText(rdfTextInstance.rdfText)
- // pass 1
-
- descriptionMap = [ : ]
- rdf.Description.each {descriptionMap.put(it.@about.toString(), it)}
-
- if (debugPrint >= 3) {
- descriptionMap.each {k, v -> println "key = $k, value=$v"}
- }
- // pass 2, 3 and 4 blended
- entityMap = [ : ] // key = url for entity, value = featureStructure for it
- instances = []
-
- relationMap = [ : ]
-
- rdf.Description.each {
- def typeurl = it.type[0].@resource
- if (isEntity(typeurl)) {
- getOrMakeEntity(it.@about.toString(), jcas)
- } else if (isRelation(typeurl)) {
- getOrMakeRelation(it.@about.toString(), jcas)
- } else if (isInstance(typeurl)) {
- def entityOrRelation = descriptionMap.get(it.subject[0].@resource.toString())
- def is_entity = isEntity(entityOrRelation.type[0].@resource)
- def kind = is_entity ? 'Entity' : 'Relation'
- def instance = newJCasInstance(jcas, "${kind}Instance")
- instances.add(instance)
- if (is_entity) {
- instance.entity = getOrMakeEntity(entityOrRelation.@about.toString(), jcas)
- } else {
- instance.relation = getOrMakeRelation(entityOrRelation.@about.toString(), jcas)
- }
- instance.begin = Integer.valueOf(it.offset[0].text())
- instance.end = Integer.valueOf(it.offset[0].text()) + Integer.valueOf(it.length[0].text())
- instance.addToIndexes()
- }
- }
-
-
- if (debugPrint >= 2) {
- entityMap.each{k, v -> println "entity key = $k, value=$v"} // test-debug
- }
-
- if (debugPrint >= 2) {
- relationMap.each {k, v -> println "relation: k: $k, v: $v"} // debug test
- }
-
- if (debugPrint >= 1) {
- instances.each {println "instance $it"}
- }
-
- descriptionMap = entityMap = relationMap = instances = null
-
- }
-
- def lastPart(thing) {
- def s = thing.toString()
- s.substring(s.lastIndexOf('/') + 1)
- }
-
- def isRelation(url) {
-// println " is relation: ${url.toString()}"
- return url?.toString()?.startsWith('http://s.opencalais.com/1/type/em/r/')
- }
-
- def isEntity(url) {
-// println " is relation: ${url.toString()}"
- return url?.toString()?.startsWith('http://s.opencalais.com/1/type/em/e/')
- }
-
- def isInstance(url) {
-// println " is relation: ${url.toString()}"
- return (url?.toString() == 'http://s.opencalais.com/1/type/sys/InstanceInfo')
-}
-
- def newJCasInstance(jcas, type) {
- def clasz = Class.forName("org.apache.uima.calaisType.$type".toString(), true, jcas.getCas().JCasClassLoader)
- def constructor = clasz.getDeclaredConstructor(JCas.class);
- return constructor.newInstance(jcas)
- // mock as a map
-// return args.clone()
- }
-
- def getOrMakeEntity(key, jcas) {
- def instance = entityMap.get(key)
- if (instance) {
- return instance
- }
- def description = descriptionMap[key]
- def typeurl = description.type[0].@resource
- instance = newJCasInstance(jcas, "entity.${lastPart(typeurl)}")
- entityMap.put(key, instance)
- instance.canonicalForm = description.name[0].text()
- instance.addToIndexes()
- return instance
- }
-
- def getOrMakeRelation(key, jcas) {
- def instance = relationMap.get(key)
- if (instance) {
- return instance
- }
- def description = descriptionMap[key]
- def typeurl = description.type[0].@resource
- def relationName = lastPart(typeurl)
- instance = newJCasInstance(jcas, "relation.$relationName")
- relationMap.put(description.@about.toString(), instance)
-
- def multiCompany = multiCompanyRelations.contains(relationName)
- if (multiCompany) {
- // special handling - make an fs array of all companies
- def numberOfCompanies = description.company.size()
- instance.company = new FSArray(jcas, numberOfCompanies)
- description.company.eachWithIndex {obj, i ->
- instance.setCompany(i, entityMap.get(obj.@resource.toString()))
- }
- }
- description.children().each {
- def featureName = it.name()
- if (featureName == 'type' ||
- (multiCompany && featureName == 'company')) {
- return
- }
- def resourceurl = it.@resource.toString()
- if (resourceurl) {
- def value = getOrMakeEntity(resourceurl, jcas)
- instance[featureName] = value
- } else {
- instance[featureName] = it.text()
- }
- }
- instance.addToIndexes()
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.annotator.calais_groovy
+
+import org.apache.uima.jcas.cas.FSArray
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase
+import org.apache.uima.jcas.JCas
+
+
+public class RdfProcessor extends JCasAnnotator_ImplBase{
+
+ static debugPrint = 0
+
+ def descriptionMap // map, key = url-like ids, value = description node
+ def entityMap
+ def relationMap
+ def instances
+ static final multiCompanyRelations = ['Alliance', 'BusinessRelation', 'JointVenture', 'Merger']
+
+
+ /* (non-Javadoc)
+ * @see org.apache.uima.analysis_component.JCasAnnotator_ImplBase#process(org.apache.uima.jcas.JCas)
+ */
+ public void process(JCas jcas){
+ def rdfindex = jcas.getIndexRepository().getIndex("org.apache.uima.annotator.calais.Rdf")
+ rdfindex.each { processRdf(it, jcas) } // should only be one
+ }
+
+ def processRdf(rdfTextInstance, jcas) {
+ // 4 passes
+ // pass 1 - create url to node map
+ // pass 2 - create entities
+ // pass 3 - create relations (point to entities)
+ // pass 4 - create instances of entities and relations -
+ // ref entities and relations, and have begin/end refs to text
+
+ // pass 2, 3, and 4 blended and done in 1 iteration.
+ def rdf = new XmlSlurper().parseText(rdfTextInstance.rdfText)
+ // pass 1
+
+ descriptionMap = [ : ]
+ rdf.Description.each {descriptionMap.put(it.@about.toString(), it)}
+
+ if (debugPrint >= 3) {
+ descriptionMap.each {k, v -> println "key = $k, value=$v"}
+ }
+ // pass 2, 3 and 4 blended
+ entityMap = [ : ] // key = url for entity, value = featureStructure for it
+ instances = []
+
+ relationMap = [ : ]
+
+ rdf.Description.each {
+ def typeurl = it.type[0].@resource
+ if (isEntity(typeurl)) {
+ getOrMakeEntity(it.@about.toString(), jcas)
+ } else if (isRelation(typeurl)) {
+ getOrMakeRelation(it.@about.toString(), jcas)
+ } else if (isInstance(typeurl)) {
+ def entityOrRelation = descriptionMap.get(it.subject[0].@resource.toString())
+ def is_entity = isEntity(entityOrRelation.type[0].@resource)
+ def kind = is_entity ? 'Entity' : 'Relation'
+ def instance = newJCasInstance(jcas, "${kind}Instance")
+ instances.add(instance)
+ if (is_entity) {
+ instance.entity = getOrMakeEntity(entityOrRelation.@about.toString(), jcas)
+ } else {
+ instance.relation = getOrMakeRelation(entityOrRelation.@about.toString(), jcas)
+ }
+ instance.begin = Integer.valueOf(it.offset[0].text())
+ instance.end = Integer.valueOf(it.offset[0].text()) + Integer.valueOf(it.length[0].text())
+ instance.addToIndexes()
+ }
+ }
+
+
+ if (debugPrint >= 2) {
+ entityMap.each{k, v -> println "entity key = $k, value=$v"} // test-debug
+ }
+
+ if (debugPrint >= 2) {
+ relationMap.each {k, v -> println "relation: k: $k, v: $v"} // debug test
+ }
+
+ if (debugPrint >= 1) {
+ instances.each {println "instance $it"}
+ }
+
+ descriptionMap = entityMap = relationMap = instances = null
+
+ }
+
+ def lastPart(thing) {
+ def s = thing.toString()
+ s.substring(s.lastIndexOf('/') + 1)
+ }
+
+ def isRelation(url) {
+// println " is relation: ${url.toString()}"
+ return url?.toString()?.startsWith('http://s.opencalais.com/1/type/em/r/')
+ }
+
+ def isEntity(url) {
+// println " is relation: ${url.toString()}"
+ return url?.toString()?.startsWith('http://s.opencalais.com/1/type/em/e/')
+ }
+
+ def isInstance(url) {
+// println " is relation: ${url.toString()}"
+ return (url?.toString() == 'http://s.opencalais.com/1/type/sys/InstanceInfo')
+}
+
+ def newJCasInstance(jcas, type) {
+ def clasz = Class.forName("org.apache.uima.calaisType.$type".toString(), true, jcas.getCas().JCasClassLoader)
+ def constructor = clasz.getDeclaredConstructor(JCas.class);
+ return constructor.newInstance(jcas)
+ // mock as a map
+// return args.clone()
+ }
+
+ def getOrMakeEntity(key, jcas) {
+ def instance = entityMap.get(key)
+ if (instance) {
+ return instance
+ }
+ def description = descriptionMap[key]
+ def typeurl = description.type[0].@resource
+ instance = newJCasInstance(jcas, "entity.${lastPart(typeurl)}")
+ entityMap.put(key, instance)
+ instance.canonicalForm = description.name[0].text()
+ instance.addToIndexes()
+ return instance
+ }
+
+ def getOrMakeRelation(key, jcas) {
+ def instance = relationMap.get(key)
+ if (instance) {
+ return instance
+ }
+ def description = descriptionMap[key]
+ def typeurl = description.type[0].@resource
+ def relationName = lastPart(typeurl)
+ instance = newJCasInstance(jcas, "relation.$relationName")
+ relationMap.put(description.@about.toString(), instance)
+
+ def multiCompany = multiCompanyRelations.contains(relationName)
+ if (multiCompany) {
+ // special handling - make an fs array of all companies
+ def numberOfCompanies = description.company.size()
+ instance.company = new FSArray(jcas, numberOfCompanies)
+ description.company.eachWithIndex {obj, i ->
+ instance.setCompany(i, entityMap.get(obj.@resource.toString()))
+ }
+ }
+ description.children().each {
+ def featureName = it.name()
+ if (featureName == 'type' ||
+ (multiCompany && featureName == 'company')) {
+ return
+ }
+ def resourceurl = it.@resource.toString()
+ if (resourceurl) {
+ def value = getOrMakeEntity(resourceurl, jcas)
+ instance[featureName] = value
+ } else {
+ instance[featureName] = it.text()
+ }
+ }
+ instance.addToIndexes()
+ }
+}
Propchange: incubator/uima/sandbox/trunk/OpenCalaisAnnotatorGroovy/src/main/groovy/org/apache/uima/annotator/calais_groovy/RdfProcessor.groovy
------------------------------------------------------------------------------
svn:eol-style = native