You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@clerezza.apache.org by to...@apache.org on 2010/04/28 19:45:25 UTC

svn commit: r939025 - in /incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src: main/java/org/apache/clerezza/uima/utils/ main/resources/ test/java/org/apache/clerezza/uima/utils/ test/resources/

Author: tommaso
Date: Wed Apr 28 17:45:25 2010
New Revision: 939025

URL: http://svn.apache.org/viewvc?rev=939025&view=rev
Log:
[CLEREZZA-124] - added category recognition of a text using UIMA and AlchemyAPI

Added:
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml   (with props)
Modified:
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/java/org/apache/clerezza/uima/utils/ExternalServicesFacade.java
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/ExtServicesAE.xml
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/java/org/apache/clerezza/uima/utils/ExternalServicesFacadeTest.java
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/resources/ExtServicesAE.xml

Modified: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/java/org/apache/clerezza/uima/utils/ExternalServicesFacade.java
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/java/org/apache/clerezza/uima/utils/ExternalServicesFacade.java?rev=939025&r1=939024&r2=939025&view=diff
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/java/org/apache/clerezza/uima/utils/ExternalServicesFacade.java (original)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/java/org/apache/clerezza/uima/utils/ExternalServicesFacade.java Wed Apr 28 17:45:25 2010
@@ -1,6 +1,7 @@
 package org.apache.clerezza.uima.utils;
 
 import org.apache.uima.UIMAException;
+import org.apache.uima.alchemy.ts.categorization.Category;
 import org.apache.uima.alchemy.ts.keywords.KeywordFS;
 import org.apache.uima.alchemy.ts.language.LanguageFS;
 import org.apache.uima.cas.FeatureStructure;
@@ -37,7 +38,7 @@ public class ExternalServicesFacade {
       JCas jcas = uimaExecutor.getResults();
 
       // get AlchemyAPI keywords extracted using UIMA
-      keywords = UIMAUtils.getAllFSofType(KeywordFS.type, jcas);
+      keywords.addAll(UIMAUtils.getAllFSofType(KeywordFS.type, jcas));
 
     } catch (Exception e) {
       throw new UIMAException(e);
@@ -83,7 +84,7 @@ public class ExternalServicesFacade {
       JCas jcas = uimaExecutor.getResults();
 
       // extract entities using OpenCalaisAnnotator
-      calaisAnnotations = UIMAUtils.getAllAnnotationsOfType(org.apache.uima.calais.BaseType.type, jcas);
+      calaisAnnotations.addAll(UIMAUtils.getAllAnnotationsOfType(org.apache.uima.calais.BaseType.type, jcas));
 
     } catch (Exception e) {
       throw new UIMAException(e);
@@ -91,6 +92,28 @@ public class ExternalServicesFacade {
     return calaisAnnotations;
   }
 
+  public String getCategory(String document) throws UIMAException {
+    String category = null;
+    try {
+
+      // analyze the document
+      uimaExecutor.analyzeDocument(document, "TextCategorizationAEDescriptor.xml", getParameterSetting());
+
+      // get execution results
+      JCas jcas = uimaExecutor.getResults();
+
+      // extract category Feature Structure using AlchemyAPI Annotator
+      FeatureStructure categoryFS = UIMAUtils.getSingletonFeatureStructure(Category.type, jcas);
+
+      category = categoryFS.getStringValue(categoryFS.getType().getFeatureByBaseName("text"));
+
+    } catch (Exception e) {
+      throw new UIMAException(e);
+    }
+    
+    return category;
+  }
+
   public Map<String, Object> getParameterSetting() {
     return parameterSetting;
   }

Modified: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/ExtServicesAE.xml
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/ExtServicesAE.xml?rev=939025&r1=939024&r2=939025&view=diff
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/ExtServicesAE.xml (original)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/ExtServicesAE.xml Wed Apr 28 17:45:25 2010
@@ -12,6 +12,9 @@
     <delegateAnalysisEngine key="TextLanguageDetectionAEDescriptor">
       <import location="TextLanguageDetectionAEDescriptor.xml"/>
     </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="TextCategorizationAEDescriptor">
+      <import location="TextCategorizationAEDescriptor.xml"/>
+    </delegateAnalysisEngine>
   </delegateAnalysisEngineSpecifiers>
   <analysisEngineMetaData>
     <name>ExtServicesAE</name>
@@ -25,6 +28,7 @@
         <node>OpenCalaisAnnotator</node>
         <node>TextKeywordExtractionAEDescriptor</node>
         <node>TextLanguageDetectionAEDescriptor</node>
+        <node>TextCategorizationAEDescriptor</node>
       </fixedFlow>
     </flowConstraints>
     <fsIndexCollection/>

Added: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml?rev=939025&view=auto
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml (added)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml Wed Apr 28 17:45:25 2010
@@ -0,0 +1,102 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    	Licensed to the Apache Software Foundation (ASF) under one
+    	or more contributor license agreements.  See the NOTICE file
+    	distributed with this work for additional information
+    	regarding copyright ownership.  The ASF licenses this file
+    	to you under the Apache License, Version 2.0 (the
+    	"License"); you may not use this file except in compliance
+    	with the License.  You may obtain a copy of the License at
+
+    	http://www.apache.org/licenses/LICENSE-2.0
+
+    	Unless required by applicable law or agreed to in writing,
+    	software distributed under the License is distributed on an
+    	"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    	KIND, either express or implied.  See the License for the
+    	specific language governing permissions and limitations
+    	under the License.
+
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.uima.alchemy.annotator.TextCategorizationAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>TextCategorizationAEDescriptor</name>
+    <description/>
+    <version>1.0</version>
+    <vendor/>
+    <configurationParameters>
+      <configurationParameter>
+        <name>apikey</name>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>outputMode</name>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>baseUrl</name>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>outputMode</name>
+        <value>
+          <string>xml</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>apikey</name>
+        <value>
+          <string>AA_API_KEY</string>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <types>
+        <typeDescription>
+          <name>org.apache.uima.alchemy.ts.categorization.Category</name>
+          <description/>
+          <supertypeName>uima.cas.TOP</supertypeName>
+          <features>
+            <featureDescription>
+              <name>score</name>
+              <description/>
+              <rangeTypeName>uima.cas.String</rangeTypeName>
+            </featureDescription>
+            <featureDescription>
+              <name>text</name>
+              <description/>
+              <rangeTypeName>uima.cas.String</rangeTypeName>
+            </featureDescription>
+          </features>
+        </typeDescription>
+      </types>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <resourceManagerConfiguration/>
+</analysisEngineDescription>

Propchange: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml
------------------------------------------------------------------------------
    svn:keywords = Date Revision Author HeadURL Id

Propchange: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/main/resources/TextCategorizationAEDescriptor.xml
------------------------------------------------------------------------------
    svn:mime-type = text/xml

Modified: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/java/org/apache/clerezza/uima/utils/ExternalServicesFacadeTest.java
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/java/org/apache/clerezza/uima/utils/ExternalServicesFacadeTest.java?rev=939025&r1=939024&r2=939025&view=diff
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/java/org/apache/clerezza/uima/utils/ExternalServicesFacadeTest.java (original)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/java/org/apache/clerezza/uima/utils/ExternalServicesFacadeTest.java Wed Apr 28 17:45:25 2010
@@ -18,6 +18,8 @@ public class ExternalServicesFacadeTest 
 
     private static final String AN_ENGLISH_TEXT = "this is a document supposed to be recognized as written in the language of Queen Elizabeth";
 
+    private static final String CLEREZZA_RELATED_TEXT = "Clerezza is fully based on OSGi. OSGi is a very lightweight approach to offer the modularization and dynamism missing in standard Java. By using OSGi services it can also interoperate with Spring-DS or Peaberry applications";
+
     @Test
     public void getLanguageTest() {
         try {
@@ -77,4 +79,19 @@ public class ExternalServicesFacadeTest 
         }
     }
 
+    @Test
+    public void getCategoryTest() {
+       try {
+            ExternalServicesFacade externalServicesFacade = new ExternalServicesFacade();
+            Map<String, Object> parameterSettings = new HashMap<String, Object>();
+            parameterSettings.put("apikey", "04490000a72fe7ec5cb3497f14e77f338c86f2fe");
+            externalServicesFacade.setParameterSetting(parameterSettings);
+            String category = externalServicesFacade.getCategory(CLEREZZA_RELATED_TEXT);
+            assertEquals(category, "computer_internet");
+        } catch (Exception e) {
+            e.printStackTrace();
+            fail(e.getLocalizedMessage());
+        }
+    }
+
 }

Modified: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/resources/ExtServicesAE.xml
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/resources/ExtServicesAE.xml?rev=939025&r1=939024&r2=939025&view=diff
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/resources/ExtServicesAE.xml (original)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.utils/src/test/resources/ExtServicesAE.xml Wed Apr 28 17:45:25 2010
@@ -12,6 +12,9 @@
     <delegateAnalysisEngine key="TextLanguageDetectionAEDescriptor">
       <import name="TextLanguageDetectionAEDescriptor"/>
     </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="TextCategorizationAEDescriptor">
+      <import name="TextCategorizationAEDescriptor"/>
+    </delegateAnalysisEngine>
   </delegateAnalysisEngineSpecifiers>
   <analysisEngineMetaData>
     <name>ExtServicesAE</name>
@@ -25,6 +28,7 @@
         <node>OpenCalaisAnnotator</node>
         <node>TextKeywordExtractionAEDescriptor</node>
         <node>TextLanguageDetectionAEDescriptor</node>
+        <node>TextCategorizationAEDescriptor</node>
       </fixedFlow>
     </flowConstraints>
     <fsIndexCollection/>