You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/12/20 10:27:05 UTC

[opennlp] branch master updated: OPENNLP-1168: Resolve concurrency issue in POS tagger

This is an automated email from the ASF dual-hosted git repository.

joern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/master by this push:
     new c2f1b68  OPENNLP-1168: Resolve concurrency issue in POS tagger
c2f1b68 is described below

commit c2f1b685abecfc11de76ffd0a28771f41b566782
Author: nisc4377 <ex...@esolutions.de>
AuthorDate: Thu Dec 14 11:30:22 2017 +0100

    OPENNLP-1168: Resolve concurrency issue in POS tagger
---
 .../tools/postag/DefaultPOSContextGenerator.java   |   7 +-
 .../postag/DefaultPOSContextGeneratorTest.java     | 173 +++++++++++++++++++++
 2 files changed, 176 insertions(+), 4 deletions(-)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java
index 3035ca5..3f4fe97 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java
@@ -43,7 +43,6 @@ public class DefaultPOSContextGenerator implements POSContextGenerator {
   private Object wordsKey;
 
   private Dictionary dict;
-  private String[] dictGram;
 
   /**
    * Initializes the current instance.
@@ -62,7 +61,7 @@ public class DefaultPOSContextGenerator implements POSContextGenerator {
    */
   public DefaultPOSContextGenerator(int cacheSize, Dictionary dict) {
     this.dict = dict;
-    dictGram = new String[1];
+
     if (cacheSize > 0) {
       contextsCache = new Cache<>(cacheSize);
     }
@@ -148,8 +147,8 @@ public class DefaultPOSContextGenerator implements POSContextGenerator {
     e.add("default");
     // add the word itself
     e.add("w=" + lex);
-    dictGram[0] = lex;
-    if (dict == null || !dict.contains(new StringList(dictGram))) {
+
+    if (dict == null || !dict.contains(new StringList(lex))) {
       // do some basic suffix analysis
       String[] suffs = getSuffixes(lex);
       for (int i = 0; i < suffs.length; i++) {
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/DefaultPOSContextGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/DefaultPOSContextGeneratorTest.java
new file mode 100644
index 0000000..450bb2c
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/DefaultPOSContextGeneratorTest.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.postag;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.util.StringList;
+
+/**
+ *
+ * We encountered a concurrency issue in the pos tagger module in the class
+ * DefaultPOSContextGenerator.
+
+ The issue is demonstrated in DefaultPOSContextGeneratorTest.java. The test "multithreading()"
+ consistently fails on our system with the current code if the number of threads
+ (NUMBER_OF_THREADS) is set to 10. If the number of threads is set to 1 (effectively disabling
+ multithreading), the test consistently passes.
+
+ We resolved the issue by removing a field in DefaultPOSContextGenerator.java.
+ *
+ */
+
+
+public class DefaultPOSContextGeneratorTest {
+
+
+  public static final int NUMBER_OF_THREADS = 10;
+  private static Object[] tokens;
+  private static DefaultPOSContextGenerator defaultPOSContextGenerator;
+  private static String[] tags;
+
+  @BeforeClass
+  public static void setUp() {
+    final String matchingToken = "tokenC";
+
+    tokens = new Object[] {"tokenA", "tokenB", matchingToken, "tokenD"};
+
+    final StringList stringList = new StringList(new String[] {matchingToken});
+
+    Dictionary dictionary = new Dictionary();
+    dictionary.put(stringList);
+
+    defaultPOSContextGenerator = new DefaultPOSContextGenerator(dictionary);
+
+    tags = new String[] {"tagA", "tagB", "tagC", "tagD"};
+  }
+
+  @Test
+  public void noDictionaryMatch() {
+    int index = 1;
+
+    final String[] actual = defaultPOSContextGenerator.getContext(index, tokens, tags);
+
+    final String[] expected = new String[] {
+        "default",
+        "w=tokenB",
+        "suf=B",
+        "suf=nB",
+        "suf=enB",
+        "suf=kenB",
+        "pre=t",
+        "pre=to",
+        "pre=tok",
+        "pre=toke",
+        "c",
+        "p=tokenA",
+        "t=tagA",
+        "pp=*SB*",
+        "n=tokenC",
+        "nn=tokenD"
+    };
+
+    Assert.assertArrayEquals("Calling with not matching index at: " + index +
+        "\nexpected \n" + Arrays.toString(expected) + " but actually was \n"
+        + Arrays.toString(actual), expected, actual);
+  }
+
+  @Test
+  public void dictionaryMatch() {
+    int indexWithDictionaryMatch = 2;
+
+    final String[] actual =
+        defaultPOSContextGenerator.getContext(indexWithDictionaryMatch, tokens, tags);
+
+    final String[] expected = new String[] {
+        "default",
+        "w=tokenC",
+        "p=tokenB",
+        "t=tagB",
+        "pp=tokenA",
+        "t2=tagA,tagB",
+        "n=tokenD",
+        "nn=*SE*"
+    };
+
+    Assert.assertArrayEquals("Calling with index matching dictionary entry at: "
+        + indexWithDictionaryMatch + "\nexpected \n" + Arrays.toString(expected)
+        + " but actually was \n" + Arrays.toString(actual), expected, actual);
+  }
+
+  @Test
+  public void multithreading() {
+    Callable<Void> matching = () -> {
+
+      dictionaryMatch();
+
+      return null;
+    };
+
+    Callable<Void> notMatching = () -> {
+
+      noDictionaryMatch();
+
+      return null;
+    };
+
+    final List<Callable<Void>> callables = IntStream.range(0, 200000)
+        .mapToObj(index -> (index % 2 == 0) ? matching : notMatching)
+        .collect(Collectors.toList());
+
+    final ExecutorService executorService = Executors.newFixedThreadPool(NUMBER_OF_THREADS);
+
+    try {
+      final List<Future<Void>> futures = executorService.invokeAll(callables);
+
+      executorService.shutdown();
+      executorService.awaitTermination(30, TimeUnit.SECONDS);
+
+      futures.forEach(future -> {
+
+        try {
+          future.get();
+        } catch (InterruptedException e) {
+          Assert.fail("Interrupted because of: " + e.getCause().getMessage());
+        } catch (ExecutionException ee) {
+          Assert.fail(ee.getCause().getMessage());
+        }
+
+      });
+    } catch (final InterruptedException e) {
+      Assert.fail("Test interrupted");
+    }
+  }
+}

-- 
To stop receiving notification emails like this one, please contact
['"commits@opennlp.apache.org" <co...@opennlp.apache.org>'].