You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@clerezza.apache.org by to...@apache.org on 2010/04/28 19:45:25 UTC
svn commit: r939025 - in
/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src:
main/java/org/apache/clerezza/uima/utils/ main/resources/
test/java/org/apache/clerezza/uima/utils/ test/resources/
Author: tommaso
Date: Wed Apr 28 17:45:25 2010
New Revision: 939025
URL: http://svn.apache.org/viewvc?rev=939025&view=rev
Log:
[CLEREZZA-124] - added category recognition of a text using UIMA and AlchemyAPI
Added:
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml (with props)
Modified:
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/java/org/apache/clerezza/uima/utils/ExternalServicesFacade.java
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/ExtServicesAE.xml
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/java/org/apache/clerezza/uima/utils/ExternalServicesFacadeTest.java
incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/resources/ExtServicesAE.xml
Modified: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/java/org/apache/clerezza/uima/utils/ExternalServicesFacade.java
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/java/org/apache/clerezza/uima/utils/ExternalServicesFacade.java?rev=939025&r1=939024&r2=939025&view=diff
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/java/org/apache/clerezza/uima/utils/ExternalServicesFacade.java (original)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/java/org/apache/clerezza/uima/utils/ExternalServicesFacade.java Wed Apr 28 17:45:25 2010
@@ -1,6 +1,7 @@
package org.apache.clerezza.uima.utils;
import org.apache.uima.UIMAException;
+import org.apache.uima.alchemy.ts.categorization.Category;
import org.apache.uima.alchemy.ts.keywords.KeywordFS;
import org.apache.uima.alchemy.ts.language.LanguageFS;
import org.apache.uima.cas.FeatureStructure;
@@ -37,7 +38,7 @@ public class ExternalServicesFacade {
JCas jcas = uimaExecutor.getResults();
// get AlchemyAPI keywords extracted using UIMA
- keywords = UIMAUtils.getAllFSofType(KeywordFS.type, jcas);
+ keywords.addAll(UIMAUtils.getAllFSofType(KeywordFS.type, jcas));
} catch (Exception e) {
throw new UIMAException(e);
@@ -83,7 +84,7 @@ public class ExternalServicesFacade {
JCas jcas = uimaExecutor.getResults();
// extract entities using OpenCalaisAnnotator
- calaisAnnotations = UIMAUtils.getAllAnnotationsOfType(org.apache.uima.calais.BaseType.type, jcas);
+ calaisAnnotations.addAll(UIMAUtils.getAllAnnotationsOfType(org.apache.uima.calais.BaseType.type, jcas));
} catch (Exception e) {
throw new UIMAException(e);
@@ -91,6 +92,28 @@ public class ExternalServicesFacade {
return calaisAnnotations;
}
+ public String getCategory(String document) throws UIMAException {
+ String category = null;
+ try {
+
+ // analyze the document
+ uimaExecutor.analyzeDocument(document, "TextCategorizationAEDescriptor.xml", getParameterSetting());
+
+ // get execution results
+ JCas jcas = uimaExecutor.getResults();
+
+ // extract category Feature Structure using AlchemyAPI Annotator
+ FeatureStructure categoryFS = UIMAUtils.getSingletonFeatureStructure(Category.type, jcas);
+
+ category = categoryFS.getStringValue(categoryFS.getType().getFeatureByBaseName("text"));
+
+ } catch (Exception e) {
+ throw new UIMAException(e);
+ }
+
+ return category;
+ }
+
public Map<String, Object> getParameterSetting() {
return parameterSetting;
}
Modified: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/ExtServicesAE.xml
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/ExtServicesAE.xml?rev=939025&r1=939024&r2=939025&view=diff
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/ExtServicesAE.xml (original)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/ExtServicesAE.xml Wed Apr 28 17:45:25 2010
@@ -12,6 +12,9 @@
<delegateAnalysisEngine key="TextLanguageDetectionAEDescriptor">
<import location="TextLanguageDetectionAEDescriptor.xml"/>
</delegateAnalysisEngine>
+ <delegateAnalysisEngine key="TextCategorizationAEDescriptor">
+ <import location="TextCategorizationAEDescriptor.xml"/>
+ </delegateAnalysisEngine>
</delegateAnalysisEngineSpecifiers>
<analysisEngineMetaData>
<name>ExtServicesAE</name>
@@ -25,6 +28,7 @@
<node>OpenCalaisAnnotator</node>
<node>TextKeywordExtractionAEDescriptor</node>
<node>TextLanguageDetectionAEDescriptor</node>
+ <node>TextCategorizationAEDescriptor</node>
</fixedFlow>
</flowConstraints>
<fsIndexCollection/>
Added: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml?rev=939025&view=auto
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml (added)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml Wed Apr 28 17:45:25 2010
@@ -0,0 +1,102 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>true</primitive>
+ <annotatorImplementationName>org.apache.uima.alchemy.annotator.TextCategorizationAnnotator</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>TextCategorizationAEDescriptor</name>
+ <description/>
+ <version>1.0</version>
+ <vendor/>
+ <configurationParameters>
+ <configurationParameter>
+ <name>apikey</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>outputMode</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>baseUrl</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>outputMode</name>
+ <value>
+ <string>xml</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>apikey</name>
+ <value>
+ <string>AA_API_KEY</string>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+ <typeSystemDescription>
+ <types>
+ <typeDescription>
+ <name>org.apache.uima.alchemy.ts.categorization.Category</name>
+ <description/>
+ <supertypeName>uima.cas.TOP</supertypeName>
+ <features>
+ <featureDescription>
+ <name>score</name>
+ <description/>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ <featureDescription>
+ <name>text</name>
+ <description/>
+ <rangeTypeName>uima.cas.String</rangeTypeName>
+ </featureDescription>
+ </features>
+ </typeDescription>
+ </types>
+ </typeSystemDescription>
+ <typePriorities/>
+ <fsIndexCollection/>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs/>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+ <resourceManagerConfiguration/>
+</analysisEngineDescription>
Propchange: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml
------------------------------------------------------------------------------
svn:keywords = Date Revision Author HeadURL Id
Propchange: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml
------------------------------------------------------------------------------
svn:mime-type = text/xml
Modified: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/java/org/apache/clerezza/uima/utils/ExternalServicesFacadeTest.java
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/java/org/apache/clerezza/uima/utils/ExternalServicesFacadeTest.java?rev=939025&r1=939024&r2=939025&view=diff
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/java/org/apache/clerezza/uima/utils/ExternalServicesFacadeTest.java (original)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/java/org/apache/clerezza/uima/utils/ExternalServicesFacadeTest.java Wed Apr 28 17:45:25 2010
@@ -18,6 +18,8 @@ public class ExternalServicesFacadeTest
private static final String AN_ENGLISH_TEXT = "this is a document supposed to be recognized as written in the language of Queen Elizabeth";
+ private static final String CLEREZZA_RELATED_TEXT = "Clerezza is fully based on OSGi. OSGi is a very lightweight approach to offer the modularization and dynamism missing in standard Java. By using OSGi services it can also interoperate with Spring-DS or Peaberry applications";
+
@Test
public void getLanguageTest() {
try {
@@ -77,4 +79,19 @@ public class ExternalServicesFacadeTest
}
}
+ @Test
+ public void getCategoryTest() {
+ try {
+ ExternalServicesFacade externalServicesFacade = new ExternalServicesFacade();
+ Map<String, Object> parameterSettings = new HashMap<String, Object>();
+ parameterSettings.put("apikey", "04490000a72fe7ec5cb3497f14e77f338c86f2fe");
+ externalServicesFacade.setParameterSetting(parameterSettings);
+ String category = externalServicesFacade.getCategory(CLEREZZA_RELATED_TEXT);
+ assertEquals(category, "computer_internet");
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail(e.getLocalizedMessage());
+ }
+ }
+
}
Modified: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/resources/ExtServicesAE.xml
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/resources/ExtServicesAE.xml?rev=939025&r1=939024&r2=939025&view=diff
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/resources/ExtServicesAE.xml (original)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/resources/ExtServicesAE.xml Wed Apr 28 17:45:25 2010
@@ -12,6 +12,9 @@
<delegateAnalysisEngine key="TextLanguageDetectionAEDescriptor">
<import name="TextLanguageDetectionAEDescriptor"/>
</delegateAnalysisEngine>
+ <delegateAnalysisEngine key="TextCategorizationAEDescriptor">
+ <import name="TextCategorizationAEDescriptor"/>
+ </delegateAnalysisEngine>
</delegateAnalysisEngineSpecifiers>
<analysisEngineMetaData>
<name>ExtServicesAE</name>
@@ -25,6 +28,7 @@
<node>OpenCalaisAnnotator</node>
<node>TextKeywordExtractionAEDescriptor</node>
<node>TextLanguageDetectionAEDescriptor</node>
+ <node>TextCategorizationAEDescriptor</node>
</fixedFlow>
</flowConstraints>
<fsIndexCollection/>