You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/12/20 10:27:05 UTC
[opennlp] branch master updated: OPENNLP-1168: Resolve concurrency
issue in POS tagger
This is an automated email from the ASF dual-hosted git repository.
joern pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/master by this push:
new c2f1b68 OPENNLP-1168: Resolve concurrency issue in POS tagger
c2f1b68 is described below
commit c2f1b685abecfc11de76ffd0a28771f41b566782
Author: nisc4377 <ex...@esolutions.de>
AuthorDate: Thu Dec 14 11:30:22 2017 +0100
OPENNLP-1168: Resolve concurrency issue in POS tagger
---
.../tools/postag/DefaultPOSContextGenerator.java | 7 +-
.../postag/DefaultPOSContextGeneratorTest.java | 173 +++++++++++++++++++++
2 files changed, 176 insertions(+), 4 deletions(-)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java
index 3035ca5..3f4fe97 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/DefaultPOSContextGenerator.java
@@ -43,7 +43,6 @@ public class DefaultPOSContextGenerator implements POSContextGenerator {
private Object wordsKey;
private Dictionary dict;
- private String[] dictGram;
/**
* Initializes the current instance.
@@ -62,7 +61,7 @@ public class DefaultPOSContextGenerator implements POSContextGenerator {
*/
public DefaultPOSContextGenerator(int cacheSize, Dictionary dict) {
this.dict = dict;
- dictGram = new String[1];
+
if (cacheSize > 0) {
contextsCache = new Cache<>(cacheSize);
}
@@ -148,8 +147,8 @@ public class DefaultPOSContextGenerator implements POSContextGenerator {
e.add("default");
// add the word itself
e.add("w=" + lex);
- dictGram[0] = lex;
- if (dict == null || !dict.contains(new StringList(dictGram))) {
+
+ if (dict == null || !dict.contains(new StringList(lex))) {
// do some basic suffix analysis
String[] suffs = getSuffixes(lex);
for (int i = 0; i < suffs.length; i++) {
diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/DefaultPOSContextGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/DefaultPOSContextGeneratorTest.java
new file mode 100644
index 0000000..450bb2c
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/DefaultPOSContextGeneratorTest.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.postag;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.util.StringList;
+
+/**
+ *
+ * We encountered a concurrency issue in the pos tagger module in the class
+ * DefaultPOSContextGenerator.
+
+ The issue is demonstrated in DefaultPOSContextGeneratorTest.java. The test "multithreading()"
+ consistently fails on our system with the current code if the number of threads
+ (NUMBER_OF_THREADS) is set to 10. If the number of threads is set to 1 (effectively disabling
+ multithreading), the test consistently passes.
+
+ We resolved the issue by removing a field in DefaultPOSContextGenerator.java.
+ *
+ */
+
+
+public class DefaultPOSContextGeneratorTest {
+
+
+ public static final int NUMBER_OF_THREADS = 10;
+ private static Object[] tokens;
+ private static DefaultPOSContextGenerator defaultPOSContextGenerator;
+ private static String[] tags;
+
+ @BeforeClass
+ public static void setUp() {
+ final String matchingToken = "tokenC";
+
+ tokens = new Object[] {"tokenA", "tokenB", matchingToken, "tokenD"};
+
+ final StringList stringList = new StringList(new String[] {matchingToken});
+
+ Dictionary dictionary = new Dictionary();
+ dictionary.put(stringList);
+
+ defaultPOSContextGenerator = new DefaultPOSContextGenerator(dictionary);
+
+ tags = new String[] {"tagA", "tagB", "tagC", "tagD"};
+ }
+
+ @Test
+ public void noDictionaryMatch() {
+ int index = 1;
+
+ final String[] actual = defaultPOSContextGenerator.getContext(index, tokens, tags);
+
+ final String[] expected = new String[] {
+ "default",
+ "w=tokenB",
+ "suf=B",
+ "suf=nB",
+ "suf=enB",
+ "suf=kenB",
+ "pre=t",
+ "pre=to",
+ "pre=tok",
+ "pre=toke",
+ "c",
+ "p=tokenA",
+ "t=tagA",
+ "pp=*SB*",
+ "n=tokenC",
+ "nn=tokenD"
+ };
+
+ Assert.assertArrayEquals("Calling with not matching index at: " + index +
+ "\nexpected \n" + Arrays.toString(expected) + " but actually was \n"
+ + Arrays.toString(actual), expected, actual);
+ }
+
+ @Test
+ public void dictionaryMatch() {
+ int indexWithDictionaryMatch = 2;
+
+ final String[] actual =
+ defaultPOSContextGenerator.getContext(indexWithDictionaryMatch, tokens, tags);
+
+ final String[] expected = new String[] {
+ "default",
+ "w=tokenC",
+ "p=tokenB",
+ "t=tagB",
+ "pp=tokenA",
+ "t2=tagA,tagB",
+ "n=tokenD",
+ "nn=*SE*"
+ };
+
+ Assert.assertArrayEquals("Calling with index matching dictionary entry at: "
+ + indexWithDictionaryMatch + "\nexpected \n" + Arrays.toString(expected)
+ + " but actually was \n" + Arrays.toString(actual), expected, actual);
+ }
+
+ @Test
+ public void multithreading() {
+ Callable<Void> matching = () -> {
+
+ dictionaryMatch();
+
+ return null;
+ };
+
+ Callable<Void> notMatching = () -> {
+
+ noDictionaryMatch();
+
+ return null;
+ };
+
+ final List<Callable<Void>> callables = IntStream.range(0, 200000)
+ .mapToObj(index -> (index % 2 == 0) ? matching : notMatching)
+ .collect(Collectors.toList());
+
+ final ExecutorService executorService = Executors.newFixedThreadPool(NUMBER_OF_THREADS);
+
+ try {
+ final List<Future<Void>> futures = executorService.invokeAll(callables);
+
+ executorService.shutdown();
+ executorService.awaitTermination(30, TimeUnit.SECONDS);
+
+ futures.forEach(future -> {
+
+ try {
+ future.get();
+ } catch (InterruptedException e) {
+ Assert.fail("Interrupted because of: " + e.getCause().getMessage());
+ } catch (ExecutionException ee) {
+ Assert.fail(ee.getCause().getMessage());
+ }
+
+ });
+ } catch (final InterruptedException e) {
+ Assert.fail("Test interrupted");
+ }
+ }
+}
--
To stop receiving notification emails like this one, please contact
['"commits@opennlp.apache.org" <co...@opennlp.apache.org>'].