You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jz...@apache.org on 2022/04/03 22:07:03 UTC

[opennlp] branch master updated: OPENNLP-1364: Moving keepNewLines to AbstractTokenizer. (#409)

This is an automated email from the ASF dual-hosted git repository.

jzemerick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/master by this push:
     new 3c635bfa OPENNLP-1364: Moving keepNewLines to AbstractTokenizer. (#409)
3c635bfa is described below

commit 3c635bfa7301ea9c9d699761618b285b2523365c
Author: Jeff Zemerick <je...@mtnfog.com>
AuthorDate: Sun Apr 3 18:06:58 2022 -0400

    OPENNLP-1364: Moving keepNewLines to AbstractTokenizer. (#409)
---
 .../src/main/java/opennlp/tools/tokenize/AbstractTokenizer.java    | 7 +++++++
 .../src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java      | 5 -----
 opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java  | 1 +
 .../src/main/java/opennlp/tools/tokenize/TokenizerME.java          | 4 ----
 .../src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java  | 4 ----
 5 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/AbstractTokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/AbstractTokenizer.java
index 7e2ba0ce..2dc3754c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/AbstractTokenizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/AbstractTokenizer.java
@@ -21,7 +21,14 @@ import opennlp.tools.util.Span;
 
 abstract class AbstractTokenizer implements Tokenizer {
 
+  protected boolean keepNewLines = false;
+
   public String[] tokenize(String s) {
     return Span.spansToStrings(tokenizePos(s), s);
   }
+
+  public void setKeepNewLines(boolean keepNewLines) {
+    this.keepNewLines = keepNewLines;
+  }
+
 }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java
index 08e29915..b9b86c85 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java
@@ -28,8 +28,6 @@ import opennlp.tools.util.StringUtil;
  */
 public class SimpleTokenizer extends AbstractTokenizer {
 
-  private boolean keepNewLines = false;
-
   static class CharacterEnum {
     static final CharacterEnum WHITESPACE = new CharacterEnum("whitespace");
     static final CharacterEnum ALPHABETIC = new CharacterEnum("alphabetic");
@@ -112,7 +110,4 @@ public class SimpleTokenizer extends AbstractTokenizer {
     return character == Character.LINE_SEPARATOR || character == Character.LETTER_NUMBER;
   }
 
-  public void setKeepNewLines(boolean keepNewLines) {
-    this.keepNewLines = keepNewLines;
-  }
 }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java
index 7564ee3a..92b5e9b8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java
@@ -62,4 +62,5 @@ public interface Tokenizer {
    * token as the individuals array elements.
    */
   Span[] tokenizePos(String s);
+
 }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
index 541f5021..c64c2355 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
@@ -118,7 +118,6 @@ public class TokenizerME extends AbstractTokenizer {
   private List<Double> tokProbs;
 
   private List<Span> newTokens;
-  private boolean keepNewLines = false;
 
   /**
    * Initializes the tokenizer by downloading a default model.
@@ -272,7 +271,4 @@ public class TokenizerME extends AbstractTokenizer {
     return useAlphaNumericOptimization;
   }
 
-  public void setKeepNewLines(boolean keepNewLines) {
-    this.keepNewLines = keepNewLines;
-  }
 }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java
index e00c30d4..f918bceb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java
@@ -36,7 +36,6 @@ public class WhitespaceTokenizer extends AbstractTokenizer {
    * {@link WhitespaceTokenizer}.
    */
   public static final WhitespaceTokenizer INSTANCE = new WhitespaceTokenizer();
-  private boolean keepNewLines = false;
 
   /**
    * Use the {@link WhitespaceTokenizer#INSTANCE} field to retrieve an instance.
@@ -82,7 +81,4 @@ public class WhitespaceTokenizer extends AbstractTokenizer {
     return character == Character.LINE_SEPARATOR || character == Character.LETTER_NUMBER;
   }
 
-  public void setKeepNewLines(boolean keepNewLines) {
-    this.keepNewLines = keepNewLines;
-  }
 }