You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ko...@apache.org on 2018/10/15 07:02:56 UTC

[opennlp] branch revert-329-OPENNLP-1214 created (now bf0d13b)

This is an automated email from the ASF dual-hosted git repository.

koji pushed a change to branch revert-329-OPENNLP-1214
in repository https://gitbox.apache.org/repos/asf/opennlp.git.


      at bf0d13b  Revert "OPENNLP-1214: use hash to avoid linear search in DefaultEndOfSentenceScanner and DefaultSDContextGenerator (#329)"

This branch includes the following new commits:

     new bf0d13b  Revert "OPENNLP-1214: use hash to avoid linear search in DefaultEndOfSentenceScanner and DefaultSDContextGenerator (#329)"

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[opennlp] 01/01: Revert "OPENNLP-1214: use hash to avoid linear search in DefaultEndOfSentenceScanner and DefaultSDContextGenerator (#329)"

Posted by ko...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

koji pushed a commit to branch revert-329-OPENNLP-1214
in repository https://gitbox.apache.org/repos/asf/opennlp.git

commit bf0d13b4b0c5304e1134ac295a9c1cb3767c3dcc
Author: Koji Sekiguchi <ko...@rondhuit.com>
AuthorDate: Mon Oct 15 16:02:50 2018 +0900

    Revert "OPENNLP-1214: use hash to avoid linear search in DefaultEndOfSentenceScanner and DefaultSDContextGenerator (#329)"
    
    This reverts commit 51cbde659383c8931525a51cf5066092826082ae.
---
 .../sentdetect/DefaultEndOfSentenceScanner.java    | 26 +++-------
 .../sentdetect/DefaultSDContextGenerator.java      | 26 +++++-----
 .../tools/sentdetect/EndOfSentenceScanner.java     |  8 ---
 .../sentdetect/DefaultSDContextGeneratorTest.java  | 59 ----------------------
 4 files changed, 22 insertions(+), 97 deletions(-)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
index 2b8c0be..75d0ec0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
@@ -19,9 +19,7 @@
 package opennlp.tools.sentdetect;
 
 import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 
 /**
  * Default implementation of the {@link EndOfSentenceScanner}.
@@ -30,9 +28,7 @@ import java.util.Set;
  */
 public class DefaultEndOfSentenceScanner implements EndOfSentenceScanner {
 
-  private Set<Character> eosCharacters;
-  @Deprecated
-  private char[] eosChars;
+  private char[] eosCharacters;
 
   /**
    * Initializes the current instance.
@@ -40,11 +36,7 @@ public class DefaultEndOfSentenceScanner implements EndOfSentenceScanner {
    * @param eosCharacters
    */
   public DefaultEndOfSentenceScanner(char[] eosCharacters) {
-    this.eosCharacters = new HashSet<>();
-    for (char eosChar: eosCharacters) {
-      this.eosCharacters.add(eosChar);
-    }
-    this.eosChars = eosCharacters;
+    this.eosCharacters = eosCharacters;
   }
 
   public List<Integer> getPositions(String s) {
@@ -57,21 +49,19 @@ public class DefaultEndOfSentenceScanner implements EndOfSentenceScanner {
 
   public List<Integer> getPositions(char[] cbuf) {
     List<Integer> l = new ArrayList<>();
+    char[] eosCharacters = getEndOfSentenceCharacters();
     for (int i = 0; i < cbuf.length; i++) {
-      if (eosCharacters.contains(cbuf[i])) {
-        l.add(i);
+      for (char eosCharacter : eosCharacters) {
+        if (cbuf[i] == eosCharacter) {
+          l.add(i);
+          break;
+        }
       }
     }
     return l;
   }
 
-  @Deprecated
   public char[] getEndOfSentenceCharacters() {
-    return eosChars;
-  }
-
-  @Override
-  public Set<Character> getEOSCharacters() {
     return eosCharacters;
   }
 }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java
index 8c2822b..a29119b 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java
@@ -19,7 +19,6 @@ package opennlp.tools.sentdetect;
 
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
@@ -43,7 +42,7 @@ public class DefaultSDContextGenerator implements SDContextGenerator {
 
   private Set<String> inducedAbbreviations;
 
-  private Set<Character> eosCharacters;
+  private char[] eosCharacters;
 
   /**
    * Creates a new <code>SDContextGenerator</code> instance with
@@ -67,10 +66,7 @@ public class DefaultSDContextGenerator implements SDContextGenerator {
    */
   public DefaultSDContextGenerator(Set<String> inducedAbbreviations, char[] eosCharacters) {
     this.inducedAbbreviations = inducedAbbreviations;
-    this.eosCharacters = new HashSet<>();
-    for (char eosChar: eosCharacters) {
-      this.eosCharacters.add(eosChar);
-    }
+    this.eosCharacters = eosCharacters;
     buf = new StringBuffer();
     collectFeats = new ArrayList<>();
   }
@@ -125,9 +121,12 @@ public class DefaultSDContextGenerator implements SDContextGenerator {
     int c = position;
     { ///assign prefix, stop if you run into a period though otherwise stop at space
       while (--c > prefixStart) {
-        if (eosCharacters.contains(sb.charAt(c))) {
-          prefixStart = c;
-          c++; // this gets us out of while loop.
+        for (int eci = 0, ecl = eosCharacters.length; eci < ecl; eci++) {
+          if (sb.charAt(c) == eosCharacters[eci]) {
+            prefixStart = c;
+            c++; // this gets us out of while loop.
+            break;
+          }
         }
       }
       prefix = String.valueOf(sb.subSequence(prefixStart, position)).trim();
@@ -139,9 +138,12 @@ public class DefaultSDContextGenerator implements SDContextGenerator {
     {
       c = position;
       while (++c < suffixEnd) {
-        if (eosCharacters.contains(sb.charAt(c))) {
-          suffixEnd = c;
-          c--; // this gets us out of while loop.
+        for (int eci = 0, ecl = eosCharacters.length; eci < ecl; eci++) {
+          if (sb.charAt(c) == eosCharacters[eci]) {
+            suffixEnd = c;
+            c--; // this gets us out of while loop.
+            break;
+          }
         }
       }
     }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
index 7963e37..b48ad3f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
@@ -18,7 +18,6 @@
 package opennlp.tools.sentdetect;
 
 import java.util.List;
-import java.util.Set;
 
 /**
  * Scans Strings, StringBuffers, and char[] arrays for the offsets of
@@ -35,16 +34,9 @@ public interface EndOfSentenceScanner {
    * Returns an array of character which can indicate the end of a sentence.
    * @return an array of character which can indicate the end of a sentence.
    */
-  @Deprecated
   char[] getEndOfSentenceCharacters();
 
   /**
-   * Returns a set of character which can indicate the end of a sentence.
-   * @return a set of character which can indicate the end of a sentence.
-   */
-  Set<Character> getEOSCharacters();
-
-  /**
    * The receiver scans the specified string for sentence ending characters and
    * returns their offsets.
    *
diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/DefaultSDContextGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/DefaultSDContextGeneratorTest.java
deleted file mode 100644
index f010498..0000000
--- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/DefaultSDContextGeneratorTest.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.sentdetect;
-
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-import opennlp.tools.sentdetect.lang.Factory;
-
-public class DefaultSDContextGeneratorTest {
-
-  @Test
-  public void testGetContext() throws Exception {
-    SDContextGenerator sdContextGenerator =
-        new DefaultSDContextGenerator(Collections.<String>emptySet(), Factory.defaultEosCharacters);
-
-    String[] context = sdContextGenerator.getContext(
-        "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 2);
-    Assert.assertArrayEquals("sn/eos=./x=Mr/2/xcap/v=/s=/n=Smith/ncap".split("/"), context);
-
-    context = sdContextGenerator.getContext(
-        "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 29);
-    Assert.assertArrayEquals("sn/eos=./x=Inc/3/xcap/v=RONDHUIT/vcap/s=/n=as".split("/"), context);
-  }
-
-  @Test
-  public void testGetContextWithAbbreviations() throws Exception {
-    SDContextGenerator sdContextGenerator =
-        new DefaultSDContextGenerator(new HashSet<>(Arrays.asList("Mr./Inc.".split("/"))),
-            Factory.defaultEosCharacters);
-
-    String[] context = sdContextGenerator.getContext(
-        "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 2);
-    Assert.assertArrayEquals("sn/eos=./x=Mr/2/xcap/xabbrev/v=/s=/n=Smith/ncap".split("/"), context);
-
-    context = sdContextGenerator.getContext(
-        "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 29);
-    Assert.assertArrayEquals("sn/eos=./x=Inc/3/xcap/xabbrev/v=RONDHUIT/vcap/s=/n=as".split("/"), context);
-  }
-}