You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2015/03/31 16:55:58 UTC
svn commit: r1670357 - in /uima/ruta/trunk:
ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/
ruta-core/src/main/java/org/apache/uima/ruta/resource/
ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/
Author: pkluegl
Date: Tue Mar 31 14:55:58 2015
New Revision: 1670357
URL: http://svn.apache.org/r1670357
Log:
UIMA-4277
- refactored dictionary generation
Modified:
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/SimpleFeatureExpression.java
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordListPersistence.java
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/SimpleFeatureExpression.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/SimpleFeatureExpression.java?rev=1670357&r1=1670356&r2=1670357&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/SimpleFeatureExpression.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/SimpleFeatureExpression.java Tue Mar 31 14:55:58 2015
@@ -73,7 +73,8 @@ public class SimpleFeatureExpression ext
public List<Feature> getFeatures(RutaBlock parent) {
if(mr != null) {
typeExpr = mr.getTypeExpression(parent);
- features = mr.getFeatureExpression(parent).getFeatureStringList(parent);
+ FeatureExpression featureExpression = mr.getFeatureExpression(parent);
+ features = featureExpression.getFeatureStringList(parent);
}
List<Feature> result = new ArrayList<Feature>();
Type type = typeExpr.getType(parent);
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java?rev=1670357&r1=1670356&r2=1670357&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java Tue Mar 31 14:55:58 2015
@@ -1314,7 +1314,7 @@ public class MultiTreeWordList implement
return true;
}
- public void createMTWLFile(String path) {
+ public void createMTWLFile(String path) throws IOException {
persistence.createMTWLFile(root, path);
}
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordListPersistence.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordListPersistence.java?rev=1670357&r1=1670356&r2=1670357&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordListPersistence.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordListPersistence.java Tue Mar 31 14:55:58 2015
@@ -21,11 +21,13 @@ package org.apache.uima.ruta.resource;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
@@ -40,12 +42,13 @@ import org.xml.sax.XMLReader;
public class MultiTreeWordListPersistence {
-
/**
* Reads the XML-File with the specified path and creates a TreeWordList.
*
- * @param root - the root node of the tree
- * @param path - path of the word list
+ * @param root
+ * - the root node of the tree
+ * @param path
+ * - path of the word list
* @throws IOException
*/
public void readMTWL(MultiTextNode root, String path) throws IOException {
@@ -56,34 +59,33 @@ public class MultiTreeWordListPersistenc
* Sniffs the content type for xml type.
*
* @param is
- * the inputStream to sniff. Must support {@link InputStream#markSupported()}
+ * the inputStream to sniff. Must support {@link InputStream#markSupported()}
* @return true if this stream starts with '<?xml'
*/
- public static boolean isSniffedXmlContentType(InputStream is)
- throws IOException {
- if (is == null)
- throw new IOException("Stream is null");
- if (!is.markSupported()){
- throw new IOException("Cannot mark stream. just wrap it in a BufferedInputStream");
- }
- byte[] bytes = new byte[5]; // peek first five letters
- is.mark(5);
- is.read(bytes);
- String prefix = new String(bytes);
- is.reset();
- if ("<?xml".equals(prefix)){
- return true;
- }
- return false;
+ public static boolean isSniffedXmlContentType(InputStream is) throws IOException {
+ if (is == null)
+ throw new IOException("Stream is null");
+ if (!is.markSupported()) {
+ throw new IOException("Cannot mark stream. just wrap it in a BufferedInputStream");
+ }
+ byte[] bytes = new byte[5]; // peek first five letters
+ is.mark(5);
+ is.read(bytes);
+ String prefix = new String(bytes);
+ is.reset();
+ if ("<?xml".equals(prefix)) {
+ return true;
+ }
+ return false;
}
public void readMTWL(MultiTextNode root, InputStream stream, String encoding) throws IOException {
try {
InputStream is = new BufferedInputStream(stream); // adds mark/reset support
boolean isXml = isSniffedXmlContentType(is);
- if (!isXml){ // MTWL is encoded
- is = new ZipInputStream(is);
- ((ZipInputStream)is).getNextEntry(); // zip must contain a single entry
+ if (!isXml) { // MTWL is encoded
+ is = new ZipInputStream(is);
+ ((ZipInputStream) is).getNextEntry(); // zip must contain a single entry
}
InputStreamReader streamReader = new InputStreamReader(is, encoding);
TrieXMLEventHandler handler = new TrieXMLEventHandler(root);
@@ -102,25 +104,47 @@ public class MultiTreeWordListPersistenc
}
}
- public void createMTWLFile(MultiTextNode root, String path) {
- createMTWLFile(root, path, "UTF-8");
+ public void createMTWLFile(MultiTextNode root, String path) throws IOException {
+ createMTWLFile(root, path, true, "UTF-8");
}
- public void createMTWLFile(MultiTextNode root, String path, String encoding) {
- try {
- FileOutputStream output = new FileOutputStream(path);
- ZipOutputStream zoutput = new ZipOutputStream(output);
- OutputStreamWriter writer = new OutputStreamWriter(zoutput, encoding);
- writer.write("<?xml version=\"1.0\" ?><root>");
- for (MultiTextNode node : root.getChildren().values()) {
- writeTextNode(writer, node);
- }
- writer.write("</root>");
- writer.close();
+ public void createMTWLFile(MultiTextNode root, boolean compressed, String path)
+ throws IOException {
+ createMTWLFile(root, path, compressed, "UTF-8");
+ }
- } catch (IOException e) {
- e.printStackTrace();
+ public void createMTWLFile(MultiTextNode root, String path, boolean compressed, String encoding)
+ throws IOException {
+ if (compressed) {
+ writeCompressedMTWLFile(root, path, encoding);
+ } else {
+ writeUncompressedMTWLFile(root, path, encoding);
+ }
+ }
+
+ private void writeCompressedMTWLFile(MultiTextNode root, String path, String encoding)
+ throws IOException {
+ // TODO
+ FileOutputStream output = new FileOutputStream(path);
+ ZipOutputStream zoutput = new ZipOutputStream(output);
+ OutputStreamWriter writer = new OutputStreamWriter(zoutput, encoding);
+ writeMTWLFile(root, writer);
+ }
+
+ private void writeUncompressedMTWLFile(MultiTextNode root, String path, String encoding)
+ throws IOException {
+ FileOutputStream output = new FileOutputStream(path);
+ OutputStreamWriter writer = new OutputStreamWriter(output, encoding);
+ writeMTWLFile(root, writer);
+ }
+
+ private void writeMTWLFile(MultiTextNode root, OutputStreamWriter writer) throws IOException {
+ writer.write("<?xml version=\"1.0\" ?><root>");
+ for (MultiTextNode node : root.getChildren().values()) {
+ writeTextNode(writer, node);
}
+ writer.write("</root>");
+ writer.close();
}
private void writeTextNode(Writer writer, MultiTextNode node) {
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java?rev=1670357&r1=1670356&r2=1670357&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java Tue Mar 31 14:55:58 2015
@@ -179,7 +179,7 @@ public class TreeWordList implements Rut
// Create Nodes from all chars of the strings besides the last one
TextNode pointer = root;
for (Character each : s.toCharArray()) {
- if(dictRemoveWS && Character.isWhitespace(each)) {
+ if (dictRemoveWS && Character.isWhitespace(each)) {
continue;
}
TextNode childNode = pointer.getChildNode(each);
@@ -238,17 +238,17 @@ public class TreeWordList implements Rut
int next = ++index;
boolean result = false;
-
+
if (ignoreCase) {
TextNode childNodeL = pointer.getChildNode(Character.toLowerCase(charAt));
TextNode childNodeU = pointer.getChildNode(Character.toUpperCase(charAt));
-
+
TextNode wsNode = pointer.getChildNode(' ');
- if(ignoreWS && wsNode != null) {
- result |= recursiveContains(wsNode, text, --next, ignoreCase, fragment, ignoreChars,
- maxIgnoreChars, ignoreWS);
+ if (ignoreWS && wsNode != null) {
+ result |= recursiveContains(wsNode, text, --next, ignoreCase, fragment, ignoreChars,
+ maxIgnoreChars, ignoreWS);
}
-
+
if (childNodeL == null && ignoreWS) {
childNodeL = skipWS(pointer, charAt);
}
@@ -266,11 +266,11 @@ public class TreeWordList implements Rut
}
} else {
TextNode wsNode = pointer.getChildNode(' ');
- if(ignoreWS && wsNode != null) {
- result |= recursiveContains(wsNode, text, --next, ignoreCase, fragment, ignoreChars,
- maxIgnoreChars, ignoreWS);
+ if (ignoreWS && wsNode != null) {
+ result |= recursiveContains(wsNode, text, --next, ignoreCase, fragment, ignoreChars,
+ maxIgnoreChars, ignoreWS);
}
-
+
TextNode childNode = pointer.getChildNode(charAt);
if (childNode == null && ignoreWS) {
childNode = skipWS(pointer, charAt);
@@ -366,9 +366,9 @@ public class TreeWordList implements Rut
try {
InputStream is = new BufferedInputStream(stream); // adds mark/reset support
boolean isXml = MultiTreeWordListPersistence.isSniffedXmlContentType(is);
- if (!isXml){ // MTWL is encoded
+ if (!isXml) { // MTWL is encoded
is = new ZipInputStream(is);
- ((ZipInputStream)is).getNextEntry(); // zip must contain a single entry
+ ((ZipInputStream) is).getNextEntry(); // zip must contain a single entry
}
InputStreamReader streamReader = new InputStreamReader(is, encoding);
this.root = new TextNode();
@@ -395,23 +395,22 @@ public class TreeWordList implements Rut
}
}
- public void createXMLFile(String path, String encoding) {
- try {
- FileOutputStream output = new FileOutputStream(path);
- ZipOutputStream zoutput = new ZipOutputStream(output);
- OutputStreamWriter writer = new OutputStreamWriter(zoutput, encoding);
- writer.write("<?xml version=\"1.0\" ?>");
- writer.write("<root>");
- for (TextNode child : root.getChildren().values()) {
- writeNode(writer, child);
- }
- writer.write("</root>");
- writer.close();
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
+ public void createXMLFile(String path, String encoding, boolean compressed) throws IOException {
+ // TODO
+ FileOutputStream output = new FileOutputStream(path);
+ ZipOutputStream zoutput = new ZipOutputStream(output);
+ OutputStreamWriter writer = new OutputStreamWriter(zoutput, encoding);
+ writer.write("<?xml version=\"1.0\" ?>");
+ writer.write("<root>");
+ for (TextNode child : root.getChildren().values()) {
+ writeNode(writer, child);
}
+ writer.write("</root>");
+ writer.close();
+ }
+
+ public void createXMLFile(String path, String encoding) throws IOException {
+ createXMLFile(encoding, encoding, true);
}
public void writeNode(Writer writer, TextNode node) {
Modified: uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java?rev=1670357&r1=1670356&r2=1670357&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java (original)
+++ uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java Tue Mar 31 14:55:58 2015
@@ -95,7 +95,11 @@ public class MultiTWLConverterHandler im
File file = newPath.toFile();
final String absolutePath = file.getAbsolutePath();
- trie.createMTWLFile(absolutePath);
+ try {
+ trie.createMTWLFile(absolutePath);
+ } catch (IOException e) {
+ RutaAddonsPlugin.error(e);
+ }
IWorkspaceRoot myWorkspaceRoot = ResourcesPlugin.getWorkspace().getRoot();
IContainer container = myWorkspaceRoot.getContainerForLocation(parent);
Modified: uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java?rev=1670357&r1=1670356&r2=1670357&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java (original)
+++ uima/ruta/trunk/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java Tue Mar 31 14:55:58 2015
@@ -82,7 +82,11 @@ public class TWLConverterHandler impleme
return Status.CANCEL_STATUS;
}
String exportPath = path.substring(0, path.length() - 3) + "twl";
- list.createXMLFile(exportPath, "UTF-8");
+ try {
+ list.createXMLFile(exportPath, "UTF-8");
+ } catch (IOException e) {
+ RutaAddonsPlugin.error(e);
+ }
IWorkspaceRoot myWorkspaceRoot = ResourcesPlugin.getWorkspace().getRoot();
IContainer container = myWorkspaceRoot.getContainerForLocation(file.getLocation());
if (container != null) {