You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jz...@apache.org on 2022/04/03 22:07:03 UTC
[opennlp] branch master updated: OPENNLP-1364: Moving keepNewLines to AbstractTokenizer. (#409)
This is an automated email from the ASF dual-hosted git repository.
jzemerick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/master by this push:
new 3c635bfa OPENNLP-1364: Moving keepNewLines to AbstractTokenizer. (#409)
3c635bfa is described below
commit 3c635bfa7301ea9c9d699761618b285b2523365c
Author: Jeff Zemerick <je...@mtnfog.com>
AuthorDate: Sun Apr 3 18:06:58 2022 -0400
OPENNLP-1364: Moving keepNewLines to AbstractTokenizer. (#409)
---
.../src/main/java/opennlp/tools/tokenize/AbstractTokenizer.java | 7 +++++++
.../src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java | 5 -----
opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java | 1 +
.../src/main/java/opennlp/tools/tokenize/TokenizerME.java | 4 ----
.../src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java | 4 ----
5 files changed, 8 insertions(+), 13 deletions(-)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/AbstractTokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/AbstractTokenizer.java
index 7e2ba0ce..2dc3754c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/AbstractTokenizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/AbstractTokenizer.java
@@ -21,7 +21,14 @@ import opennlp.tools.util.Span;
abstract class AbstractTokenizer implements Tokenizer {
+ protected boolean keepNewLines = false;
+
public String[] tokenize(String s) {
return Span.spansToStrings(tokenizePos(s), s);
}
+
+ public void setKeepNewLines(boolean keepNewLines) {
+ this.keepNewLines = keepNewLines;
+ }
+
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java
index 08e29915..b9b86c85 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/SimpleTokenizer.java
@@ -28,8 +28,6 @@ import opennlp.tools.util.StringUtil;
*/
public class SimpleTokenizer extends AbstractTokenizer {
- private boolean keepNewLines = false;
-
static class CharacterEnum {
static final CharacterEnum WHITESPACE = new CharacterEnum("whitespace");
static final CharacterEnum ALPHABETIC = new CharacterEnum("alphabetic");
@@ -112,7 +110,4 @@ public class SimpleTokenizer extends AbstractTokenizer {
return character == Character.LINE_SEPARATOR || character == Character.LETTER_NUMBER;
}
- public void setKeepNewLines(boolean keepNewLines) {
- this.keepNewLines = keepNewLines;
- }
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java
index 7564ee3a..92b5e9b8 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/Tokenizer.java
@@ -62,4 +62,5 @@ public interface Tokenizer {
* token as the individuals array elements.
*/
Span[] tokenizePos(String s);
+
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
index 541f5021..c64c2355 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
@@ -118,7 +118,6 @@ public class TokenizerME extends AbstractTokenizer {
private List<Double> tokProbs;
private List<Span> newTokens;
- private boolean keepNewLines = false;
/**
* Initializes the tokenizer by downloading a default model.
@@ -272,7 +271,4 @@ public class TokenizerME extends AbstractTokenizer {
return useAlphaNumericOptimization;
}
- public void setKeepNewLines(boolean keepNewLines) {
- this.keepNewLines = keepNewLines;
- }
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java
index e00c30d4..f918bceb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/WhitespaceTokenizer.java
@@ -36,7 +36,6 @@ public class WhitespaceTokenizer extends AbstractTokenizer {
* {@link WhitespaceTokenizer}.
*/
public static final WhitespaceTokenizer INSTANCE = new WhitespaceTokenizer();
- private boolean keepNewLines = false;
/**
* Use the {@link WhitespaceTokenizer#INSTANCE} field to retrieve an instance.
@@ -82,7 +81,4 @@ public class WhitespaceTokenizer extends AbstractTokenizer {
return character == Character.LINE_SEPARATOR || character == Character.LETTER_NUMBER;
}
- public void setKeepNewLines(boolean keepNewLines) {
- this.keepNewLines = keepNewLines;
- }
}