You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2018/03/15 10:35:14 UTC
svn commit: r1826789 - in /uima/ruta/trunk:
ruta-core-ext/src/main/java/org/apache/uima/ruta/action/
ruta-core/src/main/java/org/apache/uima/ruta/
ruta-core/src/main/java/org/apache/uima/ruta/engine/
ruta-core/src/main/java/org/apache/uima/ruta/resourc...
Author: pkluegl
Date: Thu Mar 15 10:35:14 2018
New Revision: 1826789
URL: http://svn.apache.org/viewvc?rev=1826789&view=rev
Log:
UIMA-5736 - applied patch
Added:
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/CSVTableTest.java (with props)
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/resource/
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/resource/test_csvfile.csv
Modified:
uima/ruta/trunk/ruta-core-ext/src/main/java/org/apache/uima/ruta/action/MarkTableReloadAction.java
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/CSVTable.java
Modified: uima/ruta/trunk/ruta-core-ext/src/main/java/org/apache/uima/ruta/action/MarkTableReloadAction.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core-ext/src/main/java/org/apache/uima/ruta/action/MarkTableReloadAction.java?rev=1826789&r1=1826788&r2=1826789&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core-ext/src/main/java/org/apache/uima/ruta/action/MarkTableReloadAction.java (original)
+++ uima/ruta/trunk/ruta-core-ext/src/main/java/org/apache/uima/ruta/action/MarkTableReloadAction.java Thu Mar 15 10:35:14 2018
@@ -129,7 +129,7 @@ public class MarkTableReloadAction exten
Logger.getLogger(this.getClass().getName()).info("Creating Table Word List from resource: " + tableNameValue);
try {
- table = new CSVTable(resource);
+ table = new CSVTable(resource, CSVTable.DEFAULT_CSV_SEPARATOR);
} catch (IOException e) {
Logger.getLogger(this.getClass().getName()).log(Level.SEVERE, "Unable to load CSV file: " + tableNameValue, e);
}
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java?rev=1826789&r1=1826788&r2=1826789&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java Thu Mar 15 10:35:14 2018
@@ -679,6 +679,8 @@ public class RutaEnvironment {
}
public RutaTable getWordTable(String table) {
+ UimaContext context = owner.getContext();
+
RutaTable result = tables.get(table);
if (result == null) {
if (table.endsWith("csv") || table.endsWith("txt") || table.endsWith("tsv")) {
@@ -686,7 +688,7 @@ public class RutaEnvironment {
Resource resource = resourceLoader.getResource(table);
if (resource.exists()) {
try {
- tables.put(table, new CSVTable(resource));
+ tables.put(table, new CSVTable(resource, getCsvSeparator(context)));
} catch (IOException e) {
Logger.getLogger(this.getClass().getName()).log(Level.SEVERE,
"Error reading csv table " + table, e);
@@ -696,7 +698,7 @@ public class RutaEnvironment {
}
} else {
try {
- RutaTable rutaTable = (RutaTable) owner.getContext().getResourceObject(table);
+ RutaTable rutaTable = (RutaTable) context.getResourceObject(table);
tables.put(table, rutaTable);
} catch (ResourceAccessException e) {
Logger.getLogger(this.getClass().getName()).log(Level.SEVERE,
@@ -708,6 +710,16 @@ public class RutaEnvironment {
return tables.get(table);
}
+ private String getCsvSeparator(UimaContext context) {
+ if (context != null) {
+ String cvsSeparator = (String) context.getConfigParameterValue(RutaEngine.PARAM_CSV_SEPARATOR);
+ if (cvsSeparator != null) {
+ return cvsSeparator;
+ }
+ }
+ return CSVTable.DEFAULT_CSV_SEPARATOR;
+ }
+
private void addVariable(String name, Class<?> type, Class<?> generic) {
variableTypes.put(name, type);
if (generic != null) {
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java?rev=1826789&r1=1826788&r2=1826789&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java Thu Mar 15 10:35:14 2018
@@ -73,6 +73,7 @@ import org.apache.uima.ruta.extensions.I
import org.apache.uima.ruta.extensions.RutaExternalFactory;
import org.apache.uima.ruta.parser.RutaLexer;
import org.apache.uima.ruta.parser.RutaParser;
+import org.apache.uima.ruta.resource.CSVTable;
import org.apache.uima.ruta.resource.RutaResourceLoader;
import org.apache.uima.ruta.seed.RutaAnnotationSeeder;
import org.apache.uima.ruta.type.RutaBasic;
@@ -383,6 +384,15 @@ public class RutaEngine extends JCasAnno
private Boolean dictRemoveWS = false;
/**
+ * If this parameter is set to any String value then this String/token is used to split columns in
+ * CSV tables
+ */
+ public static final String PARAM_CSV_SEPARATOR = "csvSeparator";
+
+ @ConfigurationParameter(name = PARAM_CSV_SEPARATOR, mandatory = false, defaultValue = CSVTable.DEFAULT_CSV_SEPARATOR)
+ private String csvSeparator = CSVTable.DEFAULT_CSV_SEPARATOR;
+
+ /**
* This parameter specifies the names of variables and is used in combination with the parameter
* varValues, which contains the values of the corresponding variables. The n-th entry of this
* string array specifies the variable of the n-th entry of the string array of the parameter
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/CSVTable.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/CSVTable.java?rev=1826789&r1=1826788&r2=1826789&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/CSVTable.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/CSVTable.java Thu Mar 15 10:35:14 2018
@@ -29,6 +29,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
+import java.util.regex.Pattern;
import org.apache.uima.ruta.block.RutaBlock;
import org.apache.uima.ruta.engine.RutaEngine;
@@ -36,18 +37,25 @@ import org.springframework.core.io.FileS
import org.springframework.core.io.Resource;
public class CSVTable implements RutaTable {
+ public static final String DEFAULT_CSV_SEPARATOR = ";";
private List<List<String>> tableData;
private Map<Integer, RutaWordList> columnWordLists = new HashMap<Integer, RutaWordList>(2);
+ private final String separator;
+
/**
* @param table
* A CSV table.
+ * @param selectedSeparator
+ * The separator that should be used to splitup between columns in the CSV file
+ *
* @throws IOException
* When there is a problem opening, reading or closing the table.
*/
- public CSVTable(Resource table) throws IOException {
+ public CSVTable(Resource table, String selectedSeparator) throws IOException {
+ separator = selectedSeparator;
InputStream stream = null;
try {
stream = table.getInputStream();
@@ -59,12 +67,13 @@ public class CSVTable implements RutaTab
}
}
- public CSVTable(String location) throws IOException {
- this(new FileSystemResource(location));
+ public CSVTable(String location, String selectedSeparator) throws IOException {
+ this(new FileSystemResource(location), selectedSeparator);
}
- public CSVTable(InputStream stream) throws IOException {
+ public CSVTable(InputStream stream, String selectedSeparator) throws IOException {
super();
+ separator = selectedSeparator;
buildTable(stream);
}
@@ -74,8 +83,11 @@ public class CSVTable implements RutaTab
tableData = new ArrayList<List<String>>();
while (sc.hasNext()) {
String line = sc.next().trim();
- line = line.replaceAll(";;", "; ;");
- String[] lineElements = line.split(";");
+ // Quote separator to ignore special characters in regex
+ String quotedSeparator = Pattern.quote(separator);
+ // add spacer between 2 followed separators without any other characters
+ line = line.replaceAll(quotedSeparator + quotedSeparator, separator + " " + separator);
+ String[] lineElements = line.split(quotedSeparator);
List<String> row = Arrays.asList(lineElements);
tableData.add(row);
}
Added: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/CSVTableTest.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/CSVTableTest.java?rev=1826789&view=auto
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/CSVTableTest.java (added)
+++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/CSVTableTest.java Thu Mar 15 10:35:14 2018
@@ -0,0 +1,54 @@
+package org.apache.uima.ruta.resource;
+
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.junit.Assert.assertThat;
+
+public class CSVTableTest {
+ private static final String CUSTOM_SEPARATOR = "#|#";
+
+ @Test
+ public void testDefaultLookup() throws IOException {
+ CSVTable csvTable = new CSVTable(CSVTable.class.getResourceAsStream("test_csvfile.csv"), CSVTable.DEFAULT_CSV_SEPARATOR);
+ checkValue(csvTable,0, 0, "this is the first line first column");
+ checkValue(csvTable,0, 1, "ONE");
+ checkValue(csvTable,1, 0, "this is the second line first column");
+ checkValue(csvTable,1, 1, "TWO");
+ checkValue(csvTable,2, 0, "this is the a line with custom");
+ checkValue(csvTable,2, 1, " non default separator used#|#THREE");
+ }
+
+ @Test
+ public void testDefaultLookupWithEmptyColumn() throws IOException {
+ CSVTable csvTable = new CSVTable(CSVTable.class.getResourceAsStream("test_csvfile.csv"), CSVTable.DEFAULT_CSV_SEPARATOR);
+ checkValue(csvTable,3, 0, "line with empty column");
+ checkValue(csvTable,3, 1, " "); // spacer added by table implementation
+ checkValue(csvTable,3, 2, "AFTER_EMPTY_COLUMN");
+ }
+
+ @Test
+ public void testLookupWithCustomSeparator() throws IOException {
+ CSVTable csvTable = new CSVTable(CSVTable.class.getResourceAsStream("test_csvfile.csv"), CUSTOM_SEPARATOR);
+ checkValue(csvTable,0, 0, "this is the first line first column;ONE");
+ checkValue(csvTable,1, 0, "this is the second line first column;TWO");
+ checkValue(csvTable,2, 0, "this is the a line with custom; non default separator used");
+ checkValue(csvTable,2, 1, "THREE");
+ }
+
+ @Test
+ public void testLookupWithCustomSeparatorAndEmptyColumn() throws IOException {
+ CSVTable csvTable = new CSVTable(CSVTable.class.getResourceAsStream("test_csvfile.csv"), CUSTOM_SEPARATOR);
+ checkValue(csvTable,4, 0, "line with empty column custom separator");
+ checkValue(csvTable,4, 1, " "); // spacer added by table implementation
+ checkValue(csvTable,4, 2, "AFTER_EMPTY_COLUMN2");
+ }
+
+ private void checkValue(CSVTable table, int row, int column, String expectedValue) {
+ String actualValue = table.getEntry(row, column);
+ assertThat(actualValue, is(expectedValue));
+ }
+
+}
\ No newline at end of file
Propchange: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/CSVTableTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/resource/test_csvfile.csv
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/resource/test_csvfile.csv?rev=1826789&view=auto
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/resource/test_csvfile.csv (added)
+++ uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/resource/test_csvfile.csv Thu Mar 15 10:35:14 2018
@@ -0,0 +1,5 @@
+this is the first line first column;ONE
+this is the second line first column;TWO
+this is the a line with custom; non default separator used#|#THREE
+line with empty column;;AFTER_EMPTY_COLUMN
+line with empty column custom separator#|##|#AFTER_EMPTY_COLUMN2
\ No newline at end of file