You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2015/03/31 16:55:58 UTC

svn commit: r1670357 - in /uima/ruta/trunk: ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/ ruta-core/src/main/java/org/apache/uima/ruta/resource/ ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/

Author: pkluegl
Date: Tue Mar 31 14:55:58 2015
New Revision: 1670357

URL: http://svn.apache.org/r1670357
Log:
UIMA-4277
- refactored dictionary generation

Modified:
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/SimpleFeatureExpression.java
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordListPersistence.java
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
    uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
    uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/SimpleFeatureExpression.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/SimpleFeatureExpression.java?rev=1670357&r1=1670356&r2=1670357&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/SimpleFeatureExpression.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/SimpleFeatureExpression.java Tue Mar 31 14:55:58 2015
@@ -73,7 +73,8 @@ public class SimpleFeatureExpression ext
   public List<Feature> getFeatures(RutaBlock parent) {
     if(mr != null) {
       typeExpr = mr.getTypeExpression(parent);
-      features = mr.getFeatureExpression(parent).getFeatureStringList(parent);
+      FeatureExpression featureExpression = mr.getFeatureExpression(parent);
+      features = featureExpression.getFeatureStringList(parent);
     }
     List<Feature> result = new ArrayList<Feature>();
     Type type = typeExpr.getType(parent);

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java?rev=1670357&r1=1670356&r2=1670357&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java Tue Mar 31 14:55:58 2015
@@ -1314,7 +1314,7 @@ public class MultiTreeWordList implement
     return true;
   }
 
-  public void createMTWLFile(String path) {
+  public void createMTWLFile(String path) throws IOException {
     persistence.createMTWLFile(root, path);
   }
 

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordListPersistence.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordListPersistence.java?rev=1670357&r1=1670356&r2=1670357&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordListPersistence.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordListPersistence.java Tue Mar 31 14:55:58 2015
@@ -21,11 +21,13 @@ package org.apache.uima.ruta.resource;
 
 import java.io.BufferedInputStream;
 import java.io.FileInputStream;
+import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
 import java.io.Writer;
 import java.util.zip.ZipInputStream;
 import java.util.zip.ZipOutputStream;
@@ -40,12 +42,13 @@ import org.xml.sax.XMLReader;
 
 public class MultiTreeWordListPersistence {
 
- 
   /**
    * Reads the XML-File with the specified path and creates a TreeWordList.
    * 
-   * @param root - the root node of the tree
-   * @param path - path of the word list
+   * @param root
+   *          - the root node of the tree
+   * @param path
+   *          - path of the word list
    * @throws IOException
    */
   public void readMTWL(MultiTextNode root, String path) throws IOException {
@@ -56,34 +59,33 @@ public class MultiTreeWordListPersistenc
    * Sniffs the content type for xml type.
    * 
    * @param is
-   *            the inputStream to sniff. Must support {@link InputStream#markSupported()}
+   *          the inputStream to sniff. Must support {@link InputStream#markSupported()}
    * @return true if this stream starts with '<?xml'
    */
-  public static boolean isSniffedXmlContentType(InputStream is)
-          throws IOException {
-      if (is == null)
-          throw new IOException("Stream is null");
-      if (!is.markSupported()){
-          throw new IOException("Cannot mark stream. just wrap it in a BufferedInputStream");
-      }
-      byte[] bytes = new byte[5]; // peek first five letters
-      is.mark(5);
-      is.read(bytes);
-      String prefix = new String(bytes);
-      is.reset();
-      if ("<?xml".equals(prefix)){
-          return true;
-      }
-      return false;
+  public static boolean isSniffedXmlContentType(InputStream is) throws IOException {
+    if (is == null)
+      throw new IOException("Stream is null");
+    if (!is.markSupported()) {
+      throw new IOException("Cannot mark stream. just wrap it in a BufferedInputStream");
+    }
+    byte[] bytes = new byte[5]; // peek first five letters
+    is.mark(5);
+    is.read(bytes);
+    String prefix = new String(bytes);
+    is.reset();
+    if ("<?xml".equals(prefix)) {
+      return true;
+    }
+    return false;
   }
 
   public void readMTWL(MultiTextNode root, InputStream stream, String encoding) throws IOException {
     try {
       InputStream is = new BufferedInputStream(stream); // adds mark/reset support
       boolean isXml = isSniffedXmlContentType(is);
-      if (!isXml){ // MTWL is encoded
-          is = new ZipInputStream(is);
-          ((ZipInputStream)is).getNextEntry(); // zip must contain a single entry
+      if (!isXml) { // MTWL is encoded
+        is = new ZipInputStream(is);
+        ((ZipInputStream) is).getNextEntry(); // zip must contain a single entry
       }
       InputStreamReader streamReader = new InputStreamReader(is, encoding);
       TrieXMLEventHandler handler = new TrieXMLEventHandler(root);
@@ -102,25 +104,47 @@ public class MultiTreeWordListPersistenc
     }
   }
 
-  public void createMTWLFile(MultiTextNode root, String path) {
-    createMTWLFile(root, path, "UTF-8");
+  public void createMTWLFile(MultiTextNode root, String path) throws IOException {
+    createMTWLFile(root, path, true, "UTF-8");
   }
 
-  public void createMTWLFile(MultiTextNode root, String path, String encoding) {
-    try {
-      FileOutputStream output = new FileOutputStream(path);
-      ZipOutputStream zoutput = new ZipOutputStream(output);
-      OutputStreamWriter writer = new OutputStreamWriter(zoutput, encoding);
-      writer.write("<?xml version=\"1.0\" ?><root>");
-      for (MultiTextNode node : root.getChildren().values()) {
-        writeTextNode(writer, node);
-      }
-      writer.write("</root>");
-      writer.close();
+  public void createMTWLFile(MultiTextNode root, boolean compressed, String path)
+          throws IOException {
+    createMTWLFile(root, path, compressed, "UTF-8");
+  }
 
-    } catch (IOException e) {
-      e.printStackTrace();
+  public void createMTWLFile(MultiTextNode root, String path, boolean compressed, String encoding)
+          throws IOException {
+    if (compressed) {
+      writeCompressedMTWLFile(root, path, encoding);
+    } else {
+      writeUncompressedMTWLFile(root, path, encoding);
+    }
+  }
+
+  private void writeCompressedMTWLFile(MultiTextNode root, String path, String encoding)
+          throws IOException {
+    // TODO
+    FileOutputStream output = new FileOutputStream(path);
+    ZipOutputStream zoutput = new ZipOutputStream(output);
+    OutputStreamWriter writer = new OutputStreamWriter(zoutput, encoding);
+    writeMTWLFile(root, writer);
+  }
+
+  private void writeUncompressedMTWLFile(MultiTextNode root, String path, String encoding)
+          throws IOException {
+    FileOutputStream output = new FileOutputStream(path);
+    OutputStreamWriter writer = new OutputStreamWriter(output, encoding);
+    writeMTWLFile(root, writer);
+  }
+
+  private void writeMTWLFile(MultiTextNode root, OutputStreamWriter writer) throws IOException {
+    writer.write("<?xml version=\"1.0\" ?><root>");
+    for (MultiTextNode node : root.getChildren().values()) {
+      writeTextNode(writer, node);
     }
+    writer.write("</root>");
+    writer.close();
   }
 
   private void writeTextNode(Writer writer, MultiTextNode node) {

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java?rev=1670357&r1=1670356&r2=1670357&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java Tue Mar 31 14:55:58 2015
@@ -179,7 +179,7 @@ public class TreeWordList implements Rut
     // Create Nodes from all chars of the strings besides the last one
     TextNode pointer = root;
     for (Character each : s.toCharArray()) {
-      if(dictRemoveWS && Character.isWhitespace(each)) {
+      if (dictRemoveWS && Character.isWhitespace(each)) {
         continue;
       }
       TextNode childNode = pointer.getChildNode(each);
@@ -238,17 +238,17 @@ public class TreeWordList implements Rut
     int next = ++index;
 
     boolean result = false;
-    
+
     if (ignoreCase) {
       TextNode childNodeL = pointer.getChildNode(Character.toLowerCase(charAt));
       TextNode childNodeU = pointer.getChildNode(Character.toUpperCase(charAt));
-      
+
       TextNode wsNode = pointer.getChildNode(' ');
-      if(ignoreWS && wsNode != null) {
-          result |= recursiveContains(wsNode, text, --next, ignoreCase, fragment, ignoreChars,
-                  maxIgnoreChars, ignoreWS);
+      if (ignoreWS && wsNode != null) {
+        result |= recursiveContains(wsNode, text, --next, ignoreCase, fragment, ignoreChars,
+                maxIgnoreChars, ignoreWS);
       }
-      
+
       if (childNodeL == null && ignoreWS) {
         childNodeL = skipWS(pointer, charAt);
       }
@@ -266,11 +266,11 @@ public class TreeWordList implements Rut
       }
     } else {
       TextNode wsNode = pointer.getChildNode(' ');
-      if(ignoreWS && wsNode != null) {
-          result |= recursiveContains(wsNode, text, --next, ignoreCase, fragment, ignoreChars,
-                  maxIgnoreChars, ignoreWS);
+      if (ignoreWS && wsNode != null) {
+        result |= recursiveContains(wsNode, text, --next, ignoreCase, fragment, ignoreChars,
+                maxIgnoreChars, ignoreWS);
       }
-      
+
       TextNode childNode = pointer.getChildNode(charAt);
       if (childNode == null && ignoreWS) {
         childNode = skipWS(pointer, charAt);
@@ -366,9 +366,9 @@ public class TreeWordList implements Rut
     try {
       InputStream is = new BufferedInputStream(stream); // adds mark/reset support
       boolean isXml = MultiTreeWordListPersistence.isSniffedXmlContentType(is);
-      if (!isXml){ // MTWL is encoded
+      if (!isXml) { // MTWL is encoded
         is = new ZipInputStream(is);
-        ((ZipInputStream)is).getNextEntry(); // zip must contain a single entry
+        ((ZipInputStream) is).getNextEntry(); // zip must contain a single entry
       }
       InputStreamReader streamReader = new InputStreamReader(is, encoding);
       this.root = new TextNode();
@@ -395,23 +395,22 @@ public class TreeWordList implements Rut
     }
   }
 
-  public void createXMLFile(String path, String encoding) {
-    try {
-      FileOutputStream output = new FileOutputStream(path);
-      ZipOutputStream zoutput = new ZipOutputStream(output);
-      OutputStreamWriter writer = new OutputStreamWriter(zoutput, encoding);
-      writer.write("<?xml version=\"1.0\" ?>");
-      writer.write("<root>");
-      for (TextNode child : root.getChildren().values()) {
-        writeNode(writer, child);
-      }
-      writer.write("</root>");
-      writer.close();
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-    } catch (IOException e) {
-      e.printStackTrace();
+  public void createXMLFile(String path, String encoding, boolean compressed) throws IOException {
+    // TODO
+    FileOutputStream output = new FileOutputStream(path);
+    ZipOutputStream zoutput = new ZipOutputStream(output);
+    OutputStreamWriter writer = new OutputStreamWriter(zoutput, encoding);
+    writer.write("<?xml version=\"1.0\" ?>");
+    writer.write("<root>");
+    for (TextNode child : root.getChildren().values()) {
+      writeNode(writer, child);
     }
+    writer.write("</root>");
+    writer.close();
+  }
+
+  public void createXMLFile(String path, String encoding) throws IOException {
+    createXMLFile(encoding, encoding, true);
   }
 
   public void writeNode(Writer writer, TextNode node) {

Modified: uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java?rev=1670357&r1=1670356&r2=1670357&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java (original)
+++ uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java Tue Mar 31 14:55:58 2015
@@ -95,7 +95,11 @@ public class MultiTWLConverterHandler im
           File file = newPath.toFile();
           final String absolutePath = file.getAbsolutePath();
 
-          trie.createMTWLFile(absolutePath);
+          try {
+            trie.createMTWLFile(absolutePath);
+          } catch (IOException e) {
+            RutaAddonsPlugin.error(e);
+          }
 
           IWorkspaceRoot myWorkspaceRoot = ResourcesPlugin.getWorkspace().getRoot();
           IContainer container = myWorkspaceRoot.getContainerForLocation(parent);

Modified: uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java?rev=1670357&r1=1670356&r2=1670357&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java (original)
+++ uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java Tue Mar 31 14:55:58 2015
@@ -82,7 +82,11 @@ public class TWLConverterHandler impleme
           return Status.CANCEL_STATUS;
         }
         String exportPath = path.substring(0, path.length() - 3) + "twl";
-        list.createXMLFile(exportPath, "UTF-8");
+        try {
+          list.createXMLFile(exportPath, "UTF-8");
+        } catch (IOException e) {
+          RutaAddonsPlugin.error(e);
+        }
         IWorkspaceRoot myWorkspaceRoot = ResourcesPlugin.getWorkspace().getRoot();
         IContainer container = myWorkspaceRoot.getContainerForLocation(file.getLocation());
         if (container != null) {