You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ko...@apache.org on 2018/10/15 07:02:57 UTC
[opennlp] 01/01: Revert "OPENNLP-1214: use hash to avoid linear
search in DefaultEndOfSentenceScanner and DefaultSDContextGenerator (#329)"
This is an automated email from the ASF dual-hosted git repository.
koji pushed a commit to branch revert-329-OPENNLP-1214
in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit bf0d13b4b0c5304e1134ac295a9c1cb3767c3dcc
Author: Koji Sekiguchi <ko...@rondhuit.com>
AuthorDate: Mon Oct 15 16:02:50 2018 +0900
Revert "OPENNLP-1214: use hash to avoid linear search in DefaultEndOfSentenceScanner and DefaultSDContextGenerator (#329)"
This reverts commit 51cbde659383c8931525a51cf5066092826082ae.
---
.../sentdetect/DefaultEndOfSentenceScanner.java | 26 +++-------
.../sentdetect/DefaultSDContextGenerator.java | 26 +++++-----
.../tools/sentdetect/EndOfSentenceScanner.java | 8 ---
.../sentdetect/DefaultSDContextGeneratorTest.java | 59 ----------------------
4 files changed, 22 insertions(+), 97 deletions(-)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
index 2b8c0be..75d0ec0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
@@ -19,9 +19,7 @@
package opennlp.tools.sentdetect;
import java.util.ArrayList;
-import java.util.HashSet;
import java.util.List;
-import java.util.Set;
/**
* Default implementation of the {@link EndOfSentenceScanner}.
@@ -30,9 +28,7 @@ import java.util.Set;
*/
public class DefaultEndOfSentenceScanner implements EndOfSentenceScanner {
- private Set<Character> eosCharacters;
- @Deprecated
- private char[] eosChars;
+ private char[] eosCharacters;
/**
* Initializes the current instance.
@@ -40,11 +36,7 @@ public class DefaultEndOfSentenceScanner implements EndOfSentenceScanner {
* @param eosCharacters
*/
public DefaultEndOfSentenceScanner(char[] eosCharacters) {
- this.eosCharacters = new HashSet<>();
- for (char eosChar: eosCharacters) {
- this.eosCharacters.add(eosChar);
- }
- this.eosChars = eosCharacters;
+ this.eosCharacters = eosCharacters;
}
public List<Integer> getPositions(String s) {
@@ -57,21 +49,19 @@ public class DefaultEndOfSentenceScanner implements EndOfSentenceScanner {
public List<Integer> getPositions(char[] cbuf) {
List<Integer> l = new ArrayList<>();
+ char[] eosCharacters = getEndOfSentenceCharacters();
for (int i = 0; i < cbuf.length; i++) {
- if (eosCharacters.contains(cbuf[i])) {
- l.add(i);
+ for (char eosCharacter : eosCharacters) {
+ if (cbuf[i] == eosCharacter) {
+ l.add(i);
+ break;
+ }
}
}
return l;
}
- @Deprecated
public char[] getEndOfSentenceCharacters() {
- return eosChars;
- }
-
- @Override
- public Set<Character> getEOSCharacters() {
return eosCharacters;
}
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java
index 8c2822b..a29119b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java
@@ -19,7 +19,6 @@ package opennlp.tools.sentdetect;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.HashSet;
import java.util.List;
import java.util.Set;
@@ -43,7 +42,7 @@ public class DefaultSDContextGenerator implements SDContextGenerator {
private Set<String> inducedAbbreviations;
- private Set<Character> eosCharacters;
+ private char[] eosCharacters;
/**
* Creates a new <code>SDContextGenerator</code> instance with
@@ -67,10 +66,7 @@ public class DefaultSDContextGenerator implements SDContextGenerator {
*/
public DefaultSDContextGenerator(Set<String> inducedAbbreviations, char[] eosCharacters) {
this.inducedAbbreviations = inducedAbbreviations;
- this.eosCharacters = new HashSet<>();
- for (char eosChar: eosCharacters) {
- this.eosCharacters.add(eosChar);
- }
+ this.eosCharacters = eosCharacters;
buf = new StringBuffer();
collectFeats = new ArrayList<>();
}
@@ -125,9 +121,12 @@ public class DefaultSDContextGenerator implements SDContextGenerator {
int c = position;
{ ///assign prefix, stop if you run into a period though otherwise stop at space
while (--c > prefixStart) {
- if (eosCharacters.contains(sb.charAt(c))) {
- prefixStart = c;
- c++; // this gets us out of while loop.
+ for (int eci = 0, ecl = eosCharacters.length; eci < ecl; eci++) {
+ if (sb.charAt(c) == eosCharacters[eci]) {
+ prefixStart = c;
+ c++; // this gets us out of while loop.
+ break;
+ }
}
}
prefix = String.valueOf(sb.subSequence(prefixStart, position)).trim();
@@ -139,9 +138,12 @@ public class DefaultSDContextGenerator implements SDContextGenerator {
{
c = position;
while (++c < suffixEnd) {
- if (eosCharacters.contains(sb.charAt(c))) {
- suffixEnd = c;
- c--; // this gets us out of while loop.
+ for (int eci = 0, ecl = eosCharacters.length; eci < ecl; eci++) {
+ if (sb.charAt(c) == eosCharacters[eci]) {
+ suffixEnd = c;
+ c--; // this gets us out of while loop.
+ break;
+ }
}
}
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
index 7963e37..b48ad3f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
@@ -18,7 +18,6 @@
package opennlp.tools.sentdetect;
import java.util.List;
-import java.util.Set;
/**
* Scans Strings, StringBuffers, and char[] arrays for the offsets of
@@ -35,16 +34,9 @@ public interface EndOfSentenceScanner {
* Returns an array of character which can indicate the end of a sentence.
* @return an array of character which can indicate the end of a sentence.
*/
- @Deprecated
char[] getEndOfSentenceCharacters();
/**
- * Returns a set of character which can indicate the end of a sentence.
- * @return a set of character which can indicate the end of a sentence.
- */
- Set<Character> getEOSCharacters();
-
- /**
* The receiver scans the specified string for sentence ending characters and
* returns their offsets.
*
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/DefaultSDContextGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/DefaultSDContextGeneratorTest.java
deleted file mode 100644
index f010498..0000000
--- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/DefaultSDContextGeneratorTest.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.sentdetect;
-
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-import opennlp.tools.sentdetect.lang.Factory;
-
-public class DefaultSDContextGeneratorTest {
-
- @Test
- public void testGetContext() throws Exception {
- SDContextGenerator sdContextGenerator =
- new DefaultSDContextGenerator(Collections.<String>emptySet(), Factory.defaultEosCharacters);
-
- String[] context = sdContextGenerator.getContext(
- "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 2);
- Assert.assertArrayEquals("sn/eos=./x=Mr/2/xcap/v=/s=/n=Smith/ncap".split("/"), context);
-
- context = sdContextGenerator.getContext(
- "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 29);
- Assert.assertArrayEquals("sn/eos=./x=Inc/3/xcap/v=RONDHUIT/vcap/s=/n=as".split("/"), context);
- }
-
- @Test
- public void testGetContextWithAbbreviations() throws Exception {
- SDContextGenerator sdContextGenerator =
- new DefaultSDContextGenerator(new HashSet<>(Arrays.asList("Mr./Inc.".split("/"))),
- Factory.defaultEosCharacters);
-
- String[] context = sdContextGenerator.getContext(
- "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 2);
- Assert.assertArrayEquals("sn/eos=./x=Mr/2/xcap/xabbrev/v=/s=/n=Smith/ncap".split("/"), context);
-
- context = sdContextGenerator.getContext(
- "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 29);
- Assert.assertArrayEquals("sn/eos=./x=Inc/3/xcap/xabbrev/v=RONDHUIT/vcap/s=/n=as".split("/"), context);
- }
-}