You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2022/01/10 10:57:36 UTC

[uima-ruta] branch UIMA-6383-Ruta-TRIE-Wordlist-entry-not-annotated created (now 8abfb30)

This is an automated email from the ASF dual-hosted git repository.

pkluegl pushed a change to branch UIMA-6383-Ruta-TRIE-Wordlist-entry-not-annotated
in repository https://gitbox.apache.org/repos/asf/uima-ruta.git.


      at 8abfb30  UIMA-6383: Ruta: TRIE - Wordlist entry not annotated

This branch includes the following new commits:

     new 8abfb30  UIMA-6383: Ruta: TRIE - Wordlist entry not annotated

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


[uima-ruta] 01/01: UIMA-6383: Ruta: TRIE - Wordlist entry not annotated

Posted by pk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

pkluegl pushed a commit to branch UIMA-6383-Ruta-TRIE-Wordlist-entry-not-annotated
in repository https://gitbox.apache.org/repos/asf/uima-ruta.git

commit 8abfb30d2315c5f37267e4b7e434e216fc8d44d6
Author: Peter Klügl <pe...@averbis.com>
AuthorDate: Mon Jan 10 11:57:11 2022 +0100

    UIMA-6383: Ruta: TRIE - Wordlist entry not annotated
    
    - add option to select dictRemoveWS in eclipese plugin and maven plugin
---
 .../uima/ruta/resource/MultiTreeWordList.java      | 39 +++++++++++++++++++++-
 .../ruta/utils/twl/MultiTWLConverterHandler.java   | 15 +++++++--
 .../uima/ruta/utils/twl/TWLConverterHandler.java   | 17 ++++++++--
 .../ui/preferences/RutaPreferencesMessages.java    | 12 +++----
 .../preferences/RutaPreferencesMessages.properties |  1 +
 .../uima/ruta/ide/core/RutaCorePreferences.java    |  2 ++
 .../ruta/ide/core/RutaPreferenceInitializer.java   |  2 ++
 .../uima/ruta/maven/RutaGenerateMTWLMojo.java      |  8 ++++-
 .../uima/ruta/maven/RutaGenerateTWLMojo.java       |  8 ++++-
 9 files changed, 90 insertions(+), 14 deletions(-)

diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java b/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
index 4b6d9ff..fd25685 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/resource/MultiTreeWordList.java
@@ -63,6 +63,8 @@ public class MultiTreeWordList implements RutaWordList {
   /** The cost model we are using. */
   private EditDistanceCostMap costMap;
 
+  private boolean dictRemoveWS = false;
+
   /**
    * Default constructor.
    * 
@@ -155,9 +157,25 @@ public class MultiTreeWordList implements RutaWordList {
    *           When there is a problem reading a path.
    */
   public MultiTreeWordList(String[] pathnames, File base) throws IOException {
+    this(pathnames, base, false);
+  }
+
+  /**
+   * Constructs a TreeWordList from a file with path = filename
+   * 
+   * @param pathnames
+   *          path of the file to create a TextWordList from
+   * @param base
+   *          - the relative base
+   * @param dictRemoveWS
+   *          remove white spaces
+   * @throws IOException
+   *           When there is a problem reading a path.
+   */
+  public MultiTreeWordList(String[] pathnames, File base, boolean dictRemoveWS) throws IOException {
     this.root = new MultiTextNode();
     this.costMap = new EditDistanceCostMap();
-
+    this.dictRemoveWS = dictRemoveWS;
     if (pathnames == null) {
       return;
     }
@@ -177,8 +195,23 @@ public class MultiTreeWordList implements RutaWordList {
    *           - When there is a problem reading the files.
    */
   public MultiTreeWordList(List<File> files, File base) throws IOException {
+    this(files, base, false);
+  }
+
+  /**
+   * @param files
+   *          - the input files
+   * @param base
+   *          - the relative base
+   * @param dictRemoveWS
+   *          - remove white spaces
+   * @throws IOException
+   *           - When there is a problem reading the files.
+   */
+  public MultiTreeWordList(List<File> files, File base, boolean dictRemoveWS) throws IOException {
     this.root = new MultiTextNode();
     this.costMap = new EditDistanceCostMap();
+    this.dictRemoveWS = dictRemoveWS;
 
     if (files == null) {
       return;
@@ -275,6 +308,10 @@ public class MultiTreeWordList implements RutaWordList {
 
     for (Character each : s.toCharArray()) {
 
+      if (dictRemoveWS && Character.isWhitespace(each)) {
+        continue;
+      }
+
       MultiTextNode childNode = pointer.getChildNode(each);
 
       if (childNode == null) {
diff --git a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
index 8c3fbcd..2decfcd 100755
--- a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
+++ b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/MultiTWLConverterHandler.java
@@ -54,12 +54,16 @@ import org.eclipse.ui.handlers.HandlerUtil;
 public class MultiTWLConverterHandler implements IHandler {
   private class ConverterHandlerJob extends Job {
     ExecutionEvent event;
+
     private boolean compress;
 
-    ConverterHandlerJob(ExecutionEvent event, boolean compress) {
+    private boolean dictRemoveWS;
+
+    ConverterHandlerJob(ExecutionEvent event, boolean compress, boolean dictRemoveWS) {
       super("Converting...");
       this.event = event;
       this.compress = compress;
+      this.dictRemoveWS = dictRemoveWS;
       setUser(true);
     }
 
@@ -126,16 +130,20 @@ public class MultiTWLConverterHandler implements IHandler {
     }
   }
 
+  @Override
   public void addHandlerListener(IHandlerListener handlerListener) {
   }
 
+  @Override
   public void dispose() {
   }
 
+  @Override
   public Object execute(ExecutionEvent event) throws ExecutionException {
     IPreferenceStore preferenceStore = RutaIdeUIPlugin.getDefault().getPreferenceStore();
     boolean compress = preferenceStore.getBoolean(RutaCorePreferences.COMPRESS_WORDLISTS);
-    new ConverterHandlerJob(event, compress).schedule();
+    boolean dictRemoveWS = preferenceStore.getBoolean(RutaCorePreferences.DICT_REMOVE_WS);
+    new ConverterHandlerJob(event, compress, dictRemoveWS).schedule();
     return null;
   }
 
@@ -161,14 +169,17 @@ public class MultiTWLConverterHandler implements IHandler {
     return paths;
   }
 
+  @Override
   public boolean isEnabled() {
     return true;
   }
 
+  @Override
   public boolean isHandled() {
     return true;
   }
 
+  @Override
   public void removeHandlerListener(IHandlerListener handlerListener) {
 
   }
diff --git a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
index 45a7016..cd2784e 100755
--- a/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
+++ b/ruta-ep-addons/src/main/java/org/apache/uima/ruta/utils/twl/TWLConverterHandler.java
@@ -50,12 +50,16 @@ public class TWLConverterHandler implements IHandler {
 
   private class ConverterHandlerJob extends Job {
     ExecutionEvent event;
+
     private boolean compress;
 
-    ConverterHandlerJob(ExecutionEvent event, boolean compress) {
+    private boolean dictRemoveWS;
+
+    ConverterHandlerJob(ExecutionEvent event, boolean compress, boolean dictRemoveWS) {
       super("Converting...");
       this.event = event;
       this.compress = compress;
+      this.dictRemoveWS = dictRemoveWS;
       setUser(true);
     }
 
@@ -81,7 +85,7 @@ public class TWLConverterHandler implements IHandler {
         String path = file.getRawLocation().toString();
         TreeWordList list;
         try {
-          list = new TreeWordList(path, false);
+          list = new TreeWordList(path, dictRemoveWS);
         } catch (IOException e) {
           RutaAddonsPlugin.error(e);
           return Status.CANCEL_STATUS;
@@ -109,27 +113,34 @@ public class TWLConverterHandler implements IHandler {
     }
   }
 
+  @Override
   public void addHandlerListener(IHandlerListener handlerListener) {
   }
 
+  @Override
   public void dispose() {
   }
 
+  @Override
   public Object execute(ExecutionEvent event) throws ExecutionException {
     IPreferenceStore preferenceStore = RutaIdeUIPlugin.getDefault().getPreferenceStore();
     boolean compress = preferenceStore.getBoolean(RutaCorePreferences.COMPRESS_WORDLISTS);
-    new ConverterHandlerJob(event, compress).schedule();
+    boolean dictRemoveWS = preferenceStore.getBoolean(RutaCorePreferences.DICT_REMOVE_WS);
+    new ConverterHandlerJob(event, compress, dictRemoveWS).schedule();
     return null;
   }
 
+  @Override
   public boolean isEnabled() {
     return true;
   }
 
+  @Override
   public boolean isHandled() {
     return true;
   }
 
+  @Override
   public void removeHandlerListener(IHandlerListener handlerListener) {
 
   }
diff --git a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
index 5503f03..8219a1e 100644
--- a/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
+++ b/ruta-ep-ide-ui/src/main/java/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.java
@@ -22,8 +22,7 @@ package org.apache.uima.ruta.ide.ui.preferences;
 import org.eclipse.osgi.util.NLS;
 
 public class RutaPreferencesMessages extends NLS {
-  private static final String BUNDLE_NAME = "org.apache.uima.ruta.ide.ui.preferences.RutaPreferencesMessages";//$NON-NLS-1$	
-
+  private static final String BUNDLE_NAME = "org.apache.uima.ruta.ide.ui.preferences.RutaPreferencesMessages";//$NON-NLS-1$
 
   private RutaPreferencesMessages() {
     // Do not instantiate
@@ -78,12 +77,13 @@ public class RutaPreferencesMessages extends NLS {
   public static String ProjectClearOutput;
 
   public static String NoVMInDevMode;
-  
+
   public static String AddSDI;
-  
+
   public static String CompressWordLists;
-  
+
+  public static String DictRemoveWS;
+
   public static String DefaultCasSerializationFormat;
 
-  
 }
diff --git a/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties b/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
index b1b237f..7ae71a5 100644
--- a/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
+++ b/ruta-ep-ide-ui/src/main/resources/org/apache/uima/ruta/ide/ui/preferences/RutaPreferencesMessages.properties
@@ -44,4 +44,5 @@ ProjectClearOutput = Clear output folder before running a script.
 NoVMInDevMode = Do not start a VM in development mode.
 AddSDI = Update Source Document Information when launching a script.
 CompressWordLists = Compress generated twl/mtwl word lists.
+DictRemoveWS = Remove white spaces when generating twl/mtwl word lists.
 DefaultCasSerializationFormat = Default CAS serialization format:
diff --git a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
index e6dec76..4d1e86d 100644
--- a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
+++ b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaCorePreferences.java
@@ -34,5 +34,7 @@ public class RutaCorePreferences {
 
   public static final String COMPRESS_WORDLISTS = "CompressWordLists";
 
+  public static final String DICT_REMOVE_WS = "dictRemoveWS";
+
   public static final String DEFAULT_CAS_SERIALIZATION_FORMAT = "DefaultCasSerializationFormat";
 }
diff --git a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
index 1217bf3..138a528 100644
--- a/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
+++ b/ruta-ep-ide/src/main/java/org/apache/uima/ruta/ide/core/RutaPreferenceInitializer.java
@@ -28,6 +28,7 @@ public class RutaPreferenceInitializer extends AbstractPreferenceInitializer {
   public RutaPreferenceInitializer() {
   }
 
+  @Override
   public void initializeDefaultPreferences() {
     IPreferenceStore store = RutaIdeCorePlugin.getDefault().getPreferenceStore();
     // TaskTagUtils.initializeDefaultValues(store);
@@ -38,6 +39,7 @@ public class RutaPreferenceInitializer extends AbstractPreferenceInitializer {
     store.setDefault(RutaCorePreferences.NO_VM_IN_DEV_MODE, false);
     store.setDefault(RutaCorePreferences.ADD_SDI, false);
     store.setDefault(RutaCorePreferences.COMPRESS_WORDLISTS, false);
+    store.setDefault(RutaCorePreferences.DICT_REMOVE_WS, false);
   }
 
 }
diff --git a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
index 254824c..93ebc1f 100644
--- a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
+++ b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateMTWLMojo.java
@@ -76,6 +76,12 @@ public class RutaGenerateMTWLMojo extends AbstractMojo {
   private boolean compress;
 
   /**
+   * Remove white spaces while generating dictionaries.
+   */
+  @Parameter(defaultValue = "true", required = true)
+  private boolean dictRemoveWS;
+
+  /**
    * Fail on error.
    */
   @Parameter(defaultValue = "true", required = true)
@@ -107,7 +113,7 @@ public class RutaGenerateMTWLMojo extends AbstractMojo {
 
     MultiTreeWordList trie = null;
     try {
-      trie = new MultiTreeWordList(files, new File(inputFiles.getDirectory()));
+      trie = new MultiTreeWordList(files, new File(inputFiles.getDirectory()), dictRemoveWS);
     } catch (IOException e) {
       handleError("Error creating MTWL file.", e);
     }
diff --git a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
index 961014e..b10ab3c 100644
--- a/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
+++ b/ruta-maven-plugin/src/main/java/org/apache/uima/ruta/maven/RutaGenerateTWLMojo.java
@@ -78,6 +78,12 @@ public class RutaGenerateTWLMojo extends AbstractMojo {
   private boolean compress;
 
   /**
+   * Remove white spaces while generating dictionaries.
+   */
+  @Parameter(defaultValue = "true", required = true)
+  private boolean dictRemoveWS;
+
+  /**
    * Fail on error.
    */
   @Parameter(defaultValue = "true", required = true)
@@ -111,7 +117,7 @@ public class RutaGenerateTWLMojo extends AbstractMojo {
       File outputFile = each.getValue();
       TreeWordList list = null;
       try {
-        list = new TreeWordList(inputFile.getAbsolutePath(), false);
+        list = new TreeWordList(inputFile.getAbsolutePath(), dictRemoveWS);
       } catch (IOException e) {
         handleError("Error generating twl.", e);
       }