You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@clerezza.apache.org by to...@apache.org on 2010/03/01 14:55:45 UTC

svn commit: r917527 - in /incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator: ./ src/main/java/org/ src/main/java/org/apache/ src/main/java/org/apache/clerezza/ src/main/java/org/apa...

Author: tommaso
Date: Mon Mar  1 13:55:44 2010
New Revision: 917527

URL: http://svn.apache.org/viewvc?rev=917527&view=rev
Log:
[CLEREZZA-111] - added UIMABaseMetadataGenerator which annotates language of text plain documents

Added:
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/main/java/org/
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/main/java/org/apache/
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/main/java/org/apache/clerezza/
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/main/java/org/apache/clerezza/uima/
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/main/java/org/apache/clerezza/uima/metadatagenerator/
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/main/java/org/apache/clerezza/uima/metadatagenerator/UIMABaseMetadataGenerator.java   (with props)
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/test/java/org/
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/test/java/org/apache/
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/test/java/org/apache/clerezza/
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/test/java/org/apache/clerezza/uima/
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/test/java/org/apache/clerezza/uima/metadatagenerator/
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/test/java/org/apache/clerezza/uima/metadatagenerator/UIMABaseMetadataGeneratorTest.java   (with props)
Modified:
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/   (props changed)
    incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/pom.xml

Propchange: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Mon Mar  1 13:55:44 2010
@@ -0,0 +1,4 @@
+.settings
+target
+.classpath
+.project

Modified: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/pom.xml
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/pom.xml?rev=917527&r1=917526&r2=917527&view=diff
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/pom.xml (original)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/pom.xml Mon Mar  1 13:55:44 2010
@@ -14,12 +14,6 @@
   </parent>
   <dependencies>
     <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <version>4.7</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
       <groupId>org.apache.clerezza</groupId>
       <artifactId>org.apache.clerezza.platform.content</artifactId>
     </dependency>
@@ -44,5 +38,15 @@
       <groupId>org.osgi</groupId>
       <artifactId>org.osgi.compendium</artifactId>
     </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.clerezza</groupId>
+      <artifactId>org.apache.clerezza.jaxrs.testutils</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 </project>
\ No newline at end of file

Added: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/main/java/org/apache/clerezza/uima/metadatagenerator/UIMABaseMetadataGenerator.java
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/main/java/org/apache/clerezza/uima/metadatagenerator/UIMABaseMetadataGenerator.java?rev=917527&view=auto
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/main/java/org/apache/clerezza/uima/metadatagenerator/UIMABaseMetadataGenerator.java (added)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/main/java/org/apache/clerezza/uima/metadatagenerator/UIMABaseMetadataGenerator.java Mon Mar  1 13:55:44 2010
@@ -0,0 +1,58 @@
+package org.apache.clerezza.uima.metadatagenerator;
+
+import java.util.List;
+
+import javax.ws.rs.core.MediaType;
+
+import org.apache.clerezza.platform.content.MetaDataGenerator;
+import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.ontologies.DCTERMS;
+import org.apache.clerezza.rdf.utils.GraphNode;
+import org.apache.clerezza.uima.utils.ExternalServicesFacade;
+import org.apache.uima.UIMAException;
+
+/**
+ * 
+ * An implementation of <code>MetaDataGenerator</code> generates meta data about specified data
+ * depending on its media type using UIMA.
+ * 
+ */
+public class UIMABaseMetadataGenerator implements MetaDataGenerator {
+
+  private ExternalServicesFacade facade = new ExternalServicesFacade();
+
+  @Override
+  public void generate(GraphNode node, byte[] data, MediaType mediaType) {
+    // FIXME only TEXT_PLAIN, also different MediaTypes should be served
+    if (MediaType.TEXT_PLAIN.equals(mediaType.getType())) {
+      try {
+        //add language to the document
+        addLanguage(node, data);
+      } catch (Throwable e) {
+        // quietly react to errors
+      }
+    }
+  }
+
+  private void addLanguage(GraphNode node, byte[] data) throws UIMAException {
+    // get language to bind to the node
+    String language = facade.getLanguage(data.toString());
+    addStringLiteral(language, node, DCTERMS.language);
+  }
+
+  private void addTags(GraphNode node, byte[] data) throws UIMAException {
+    // get keywords (tags) to bind to the node
+    List<String> tags = facade.getTags(data.toString());
+    for (String keyword : tags) {
+      // add each tag inside the node
+      // FIXME find the proper UriRef to store tags
+      addStringLiteral(keyword, node, null);
+    }
+  }
+
+  private void addStringLiteral(String value, GraphNode node, UriRef uriRef) {
+    node.addProperty(uriRef, LiteralFactory.getInstance().createTypedLiteral(value));
+  }
+
+}

Propchange: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/main/java/org/apache/clerezza/uima/metadatagenerator/UIMABaseMetadataGenerator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/test/java/org/apache/clerezza/uima/metadatagenerator/UIMABaseMetadataGeneratorTest.java
URL: http://svn.apache.org/viewvc/incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/test/java/org/apache/clerezza/uima/metadatagenerator/UIMABaseMetadataGeneratorTest.java?rev=917527&view=auto
==============================================================================
--- incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/test/java/org/apache/clerezza/uima/metadatagenerator/UIMABaseMetadataGeneratorTest.java (added)
+++ incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/test/java/org/apache/clerezza/uima/metadatagenerator/UIMABaseMetadataGeneratorTest.java Mon Mar  1 13:55:44 2010
@@ -0,0 +1,43 @@
+package org.apache.clerezza.uima.metadatagenerator;
+
+import static org.junit.Assert.fail;
+
+import javax.ws.rs.core.MediaType;
+
+import org.apache.clerezza.rdf.core.MGraph;
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
+import org.apache.clerezza.rdf.utils.GraphNode;
+import org.junit.Test;
+/**
+ * Testcase for {@link UIMABaseMetadataGenerator}
+ *
+ */
+public class UIMABaseMetadataGeneratorTest {
+  
+  @Test
+  public void testConstructor() {
+    try {
+      new UIMABaseMetadataGenerator();
+    }
+    catch (Exception e) {
+      fail(e.getLocalizedMessage());
+    }
+  }
+  
+  @Test
+  public void testGenerateMethodWithUnsupportedMediaType() {
+    try {
+      UIMABaseMetadataGenerator baseMetadataGenerator = new UIMABaseMetadataGenerator();
+      byte[] data = new byte[]{};
+      MGraph mGraph = new SimpleMGraph();
+      GraphNode node = new GraphNode(new UriRef("test"), mGraph.getGraph());
+      MediaType mediaType = MediaType.valueOf("multipart/form-data; boundary=AaB03x");
+      baseMetadataGenerator.generate(node, data, mediaType);
+    } catch (Exception e) {
+      fail(e.getLocalizedMessage());
+    }
+    
+  }
+
+}

Propchange: incubator/clerezza/trunk/org.apache.clerezza.parent/org.apache.clerezza.uima/org.apache.clerezza.uima.metadata-generator/src/test/java/org/apache/clerezza/uima/metadatagenerator/UIMABaseMetadataGeneratorTest.java
------------------------------------------------------------------------------
    svn:eol-style = native