You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2018/03/15 10:35:14 UTC

svn commit: r1826789 - in /uima/ruta/trunk: ruta-core-ext/src/main/java/org/apache/uima/ruta/action/ ruta-core/src/main/java/org/apache/uima/ruta/ ruta-core/src/main/java/org/apache/uima/ruta/engine/ ruta-core/src/main/java/org/apache/uima/ruta/resourc...

Author: pkluegl
Date: Thu Mar 15 10:35:14 2018
New Revision: 1826789

URL: http://svn.apache.org/viewvc?rev=1826789&view=rev
Log:
UIMA-5736 - applied patch

Added:
    uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/
    uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/CSVTableTest.java   (with props)
    uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/resource/
    uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/resource/test_csvfile.csv
Modified:
    uima/ruta/trunk/ruta-core-ext/src/main/java/org/apache/uima/ruta/action/MarkTableReloadAction.java
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/CSVTable.java

Modified: uima/ruta/trunk/ruta-core-ext/src/main/java/org/apache/uima/ruta/action/MarkTableReloadAction.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core-ext/src/main/java/org/apache/uima/ruta/action/MarkTableReloadAction.java?rev=1826789&r1=1826788&r2=1826789&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core-ext/src/main/java/org/apache/uima/ruta/action/MarkTableReloadAction.java (original)
+++ uima/ruta/trunk/ruta-core-ext/src/main/java/org/apache/uima/ruta/action/MarkTableReloadAction.java Thu Mar 15 10:35:14 2018
@@ -129,7 +129,7 @@ public class MarkTableReloadAction exten
                 Logger.getLogger(this.getClass().getName()).info("Creating Table Word List from resource: " + tableNameValue);
 
                 try {
-                    table = new CSVTable(resource);
+                    table = new CSVTable(resource, CSVTable.DEFAULT_CSV_SEPARATOR);
                 } catch (IOException e) {
                     Logger.getLogger(this.getClass().getName()).log(Level.SEVERE, "Unable to load CSV file: " + tableNameValue, e);
                 }

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java?rev=1826789&r1=1826788&r2=1826789&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaEnvironment.java Thu Mar 15 10:35:14 2018
@@ -679,6 +679,8 @@ public class RutaEnvironment {
 	}
 
 	public RutaTable getWordTable(String table) {
+		UimaContext context = owner.getContext();
+
 		RutaTable result = tables.get(table);
 		if (result == null) {
 			if (table.endsWith("csv") || table.endsWith("txt") || table.endsWith("tsv")) {
@@ -686,7 +688,7 @@ public class RutaEnvironment {
 				Resource resource = resourceLoader.getResource(table);
 				if (resource.exists()) {
 					try {
-						tables.put(table, new CSVTable(resource));
+						tables.put(table, new CSVTable(resource, getCsvSeparator(context)));
 					} catch (IOException e) {
 						Logger.getLogger(this.getClass().getName()).log(Level.SEVERE,
 								"Error reading csv table " + table, e);
@@ -696,7 +698,7 @@ public class RutaEnvironment {
 				}
 			} else {
 				try {
-					RutaTable rutaTable = (RutaTable) owner.getContext().getResourceObject(table);
+					RutaTable rutaTable = (RutaTable) context.getResourceObject(table);
 					tables.put(table, rutaTable);
 				} catch (ResourceAccessException e) {
 					Logger.getLogger(this.getClass().getName()).log(Level.SEVERE,
@@ -708,6 +710,16 @@ public class RutaEnvironment {
 		return tables.get(table);
 	}
 
+	private String getCsvSeparator(UimaContext context) {
+		if (context != null) {
+            String cvsSeparator = (String) context.getConfigParameterValue(RutaEngine.PARAM_CSV_SEPARATOR);
+            if (cvsSeparator != null) {
+               return cvsSeparator;
+            }
+        }
+		return CSVTable.DEFAULT_CSV_SEPARATOR;
+	}
+
 	private void addVariable(String name, Class<?> type, Class<?> generic) {
 		variableTypes.put(name, type);
 		if (generic != null) {

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java?rev=1826789&r1=1826788&r2=1826789&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java Thu Mar 15 10:35:14 2018
@@ -73,6 +73,7 @@ import org.apache.uima.ruta.extensions.I
 import org.apache.uima.ruta.extensions.RutaExternalFactory;
 import org.apache.uima.ruta.parser.RutaLexer;
 import org.apache.uima.ruta.parser.RutaParser;
+import org.apache.uima.ruta.resource.CSVTable;
 import org.apache.uima.ruta.resource.RutaResourceLoader;
 import org.apache.uima.ruta.seed.RutaAnnotationSeeder;
 import org.apache.uima.ruta.type.RutaBasic;
@@ -383,6 +384,15 @@ public class RutaEngine extends JCasAnno
   private Boolean dictRemoveWS = false;
 
   /**
+   * If this parameter is set to any String value then this String/token is used to split columns in
+   * CSV tables
+   */
+  public static final String PARAM_CSV_SEPARATOR = "csvSeparator";
+
+  @ConfigurationParameter(name = PARAM_CSV_SEPARATOR, mandatory = false, defaultValue = CSVTable.DEFAULT_CSV_SEPARATOR)
+  private String csvSeparator = CSVTable.DEFAULT_CSV_SEPARATOR;
+
+  /**
    * This parameter specifies the names of variables and is used in combination with the parameter
    * varValues, which contains the values of the corresponding variables. The n-th entry of this
    * string array specifies the variable of the n-th entry of the string array of the parameter

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/CSVTable.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/CSVTable.java?rev=1826789&r1=1826788&r2=1826789&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/CSVTable.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/CSVTable.java Thu Mar 15 10:35:14 2018
@@ -29,6 +29,7 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Scanner;
+import java.util.regex.Pattern;
 
 import org.apache.uima.ruta.block.RutaBlock;
 import org.apache.uima.ruta.engine.RutaEngine;
@@ -36,18 +37,25 @@ import org.springframework.core.io.FileS
 import org.springframework.core.io.Resource;
 
 public class CSVTable implements RutaTable {
+  public static final String DEFAULT_CSV_SEPARATOR = ";";
 
   private List<List<String>> tableData;
 
   private Map<Integer, RutaWordList> columnWordLists = new HashMap<Integer, RutaWordList>(2);
 
+  private final String separator;
+
   /**
    * @param table
    *          A CSV table.
+   * @param selectedSeparator
+   *         The separator that should be used to splitup between columns in the CSV file
+   *
    * @throws IOException
    *           When there is a problem opening, reading or closing the table.
    */
-  public CSVTable(Resource table) throws IOException {
+  public CSVTable(Resource table, String selectedSeparator) throws IOException {
+    separator = selectedSeparator;
     InputStream stream = null;
     try {
       stream = table.getInputStream();
@@ -59,12 +67,13 @@ public class CSVTable implements RutaTab
     }
   }
 
-  public CSVTable(String location) throws IOException {
-    this(new FileSystemResource(location));
+  public CSVTable(String location, String selectedSeparator) throws IOException {
+    this(new FileSystemResource(location), selectedSeparator);
   }
 
-  public CSVTable(InputStream stream) throws IOException {
+  public CSVTable(InputStream stream, String selectedSeparator) throws IOException {
     super();
+    separator = selectedSeparator;
     buildTable(stream);
   }
 
@@ -74,8 +83,11 @@ public class CSVTable implements RutaTab
     tableData = new ArrayList<List<String>>();
     while (sc.hasNext()) {
       String line = sc.next().trim();
-      line = line.replaceAll(";;", "; ;");
-      String[] lineElements = line.split(";");
+      // Quote separator to ignore special characters in regex
+      String quotedSeparator = Pattern.quote(separator);
+      // add spacer between 2 followed separators without any other characters
+      line = line.replaceAll(quotedSeparator + quotedSeparator, separator + " " + separator);
+      String[] lineElements = line.split(quotedSeparator);
       List<String> row = Arrays.asList(lineElements);
       tableData.add(row);
     }

Added: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/CSVTableTest.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/CSVTableTest.java?rev=1826789&view=auto
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/CSVTableTest.java (added)
+++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/CSVTableTest.java Thu Mar 15 10:35:14 2018
@@ -0,0 +1,54 @@
+package org.apache.uima.ruta.resource;
+
+import org.junit.Test;
+
+import java.io.IOException;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.junit.Assert.assertThat;
+
+public class CSVTableTest {
+    private static final String CUSTOM_SEPARATOR = "#|#";
+
+    @Test
+    public void testDefaultLookup() throws IOException {
+        CSVTable csvTable = new CSVTable(CSVTable.class.getResourceAsStream("test_csvfile.csv"), CSVTable.DEFAULT_CSV_SEPARATOR);
+        checkValue(csvTable,0, 0, "this is the first line first column");
+        checkValue(csvTable,0, 1, "ONE");
+        checkValue(csvTable,1, 0, "this is the second line first column");
+        checkValue(csvTable,1, 1, "TWO");
+        checkValue(csvTable,2, 0, "this is the a line with custom");
+        checkValue(csvTable,2, 1, " non default separator used#|#THREE");
+    }
+
+    @Test
+    public void testDefaultLookupWithEmptyColumn() throws IOException {
+        CSVTable csvTable = new CSVTable(CSVTable.class.getResourceAsStream("test_csvfile.csv"), CSVTable.DEFAULT_CSV_SEPARATOR);
+        checkValue(csvTable,3, 0, "line with empty column");
+        checkValue(csvTable,3, 1, " "); // spacer added by table implementation
+        checkValue(csvTable,3, 2, "AFTER_EMPTY_COLUMN");
+    }
+
+    @Test
+    public void testLookupWithCustomSeparator() throws IOException {
+        CSVTable csvTable = new CSVTable(CSVTable.class.getResourceAsStream("test_csvfile.csv"), CUSTOM_SEPARATOR);
+        checkValue(csvTable,0, 0, "this is the first line first column;ONE");
+        checkValue(csvTable,1, 0, "this is the second line first column;TWO");
+        checkValue(csvTable,2, 0, "this is the a line with custom; non default separator used");
+        checkValue(csvTable,2, 1, "THREE");
+    }
+
+    @Test
+    public void testLookupWithCustomSeparatorAndEmptyColumn() throws IOException {
+        CSVTable csvTable = new CSVTable(CSVTable.class.getResourceAsStream("test_csvfile.csv"), CUSTOM_SEPARATOR);
+        checkValue(csvTable,4, 0, "line with empty column custom separator");
+        checkValue(csvTable,4, 1, " "); // spacer added by table implementation
+        checkValue(csvTable,4, 2, "AFTER_EMPTY_COLUMN2");
+    }
+
+    private void checkValue(CSVTable table, int row, int column, String expectedValue) {
+        String actualValue = table.getEntry(row, column);
+        assertThat(actualValue, is(expectedValue));
+    }
+
+}
\ No newline at end of file

Propchange: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/CSVTableTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/resource/test_csvfile.csv
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/resource/test_csvfile.csv?rev=1826789&view=auto
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/resource/test_csvfile.csv (added)
+++ uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/resource/test_csvfile.csv Thu Mar 15 10:35:14 2018
@@ -0,0 +1,5 @@
+this is the first line first column;ONE
+this is the second line first column;TWO
+this is the a line with custom; non default separator used#|#THREE
+line with empty column;;AFTER_EMPTY_COLUMN
+line with empty column custom separator#|##|#AFTER_EMPTY_COLUMN2
\ No newline at end of file