You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ko...@apache.org on 2018/01/12 02:50:21 UTC

[opennlp] branch master updated: OPENNLP-1160: avoid letting users specify CachedFeatureGeneratorFactory in XML config (#305)

This is an automated email from the ASF dual-hosted git repository.

koji pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/master by this push:
     new c9685cd  OPENNLP-1160: avoid letting users specify CachedFeatureGeneratorFactory in XML config (#305)
c9685cd is described below

commit c9685cd5e462926aa401fca7321ea23927492006
Author: Koji Sekiguchi <ko...@rondhuit.com>
AuthorDate: Fri Jan 12 11:50:19 2018 +0900

    OPENNLP-1160: avoid letting users specify CachedFeatureGeneratorFactory in XML config (#305)
---
 .../tools/util/featuregen/GeneratorFactory.java    | 16 ++++++-
 .../tools/namefind/ner-default-features.xml        | 38 ++++++++--------
 .../opennlp/tools/postag/pos-default-features.xml  | 38 ++++++++--------
 .../util/featuregen/GeneratorFactoryTest.java      | 27 ++++++++++-
 .../opennlp/tools/eval/ner-en_pos-features.xml     | 39 +++++++++-------
 .../opennlp/tools/namefind/ner-pos-features.xml    | 52 ++++++++++------------
 ...callyInsertAggregatedFeatureGeneratorCache.xml} | 10 ++---
 ...ld.xml => TestInsertCachedFeatureGenerator.xml} |  8 ++--
 ...callyInsertAggregatedFeatureGeneratorCache.xml} |  6 +--
 9 files changed, 131 insertions(+), 103 deletions(-)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
index 867c1e0..bf55abf 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
@@ -469,13 +469,25 @@ public class GeneratorFactory {
         }
       }
 
+      AdaptiveFeatureGenerator featureGenerator = null;
       if (generators.size() == 1)
-        return generators.get(0);
+        featureGenerator = generators.get(0);
       else if (generators.size() > 1)
-        return new AggregatedFeatureGenerator(generators.toArray(
+        featureGenerator = new AggregatedFeatureGenerator(generators.toArray(
             new AdaptiveFeatureGenerator[generators.size()]));
       else
         throw new InvalidFormatException("featureGenerators must have one or more generators");
+
+      // disallow manually specifying CachedFeatureGenerator
+      if (featureGenerator instanceof CachedFeatureGenerator)
+        throw new InvalidFormatException("CachedFeatureGeneratorFactory cannot be specified manually." +
+          "Use cache=\"true\" attribute in featureGenerators element instead.");
+
+      // check cache usage
+      if (Boolean.parseBoolean(generatorElement.getAttribute("cache")))
+        return new CachedFeatureGenerator(featureGenerator);
+      else
+        return featureGenerator;
     }
     else {
       // support classic format
diff --git a/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml b/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
index 32887cf..1f60ad1 100644
--- a/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
+++ b/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
@@ -18,24 +18,22 @@
 -->
 
 <!-- Default name finder feature generator configuration -->
-<featureGenerators name="nameFinder">
-    <generator class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
-        <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
-          <int name="prevLength">2</int>
-          <int name="nextLength">2</int>
-          <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
-        </generator>
-        <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
-          <int name="prevLength">2</int>
-          <int name="nextLength">2</int>
-          <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
-        </generator>
-        <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
-        <generator class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
-        <generator class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
-        <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
-          <bool name="begin">true</bool>
-          <bool name="end">false</bool>
-        </generator>
-    </generator>
+<featureGenerators cache="true" name="nameFinder">
+  <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+    <int name="prevLength">2</int>
+    <int name="nextLength">2</int>
+    <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+  </generator>
+  <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+    <int name="prevLength">2</int>
+    <int name="nextLength">2</int>
+    <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+  </generator>
+  <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+    <bool name="begin">true</bool>
+    <bool name="end">false</bool>
+  </generator>
 </featureGenerators>
diff --git a/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
index c1be8ee..2137511 100644
--- a/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
+++ b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
@@ -18,25 +18,23 @@
 -->
 
 <!-- Default pos tagger feature generator configuration -->
-<featureGenerators name="posTagger">
-    <generator class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
-        <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
-        <generator class="opennlp.tools.util.featuregen.SuffixFeatureGeneratorFactory"/>
-        <generator class="opennlp.tools.util.featuregen.PrefixFeatureGeneratorFactory"/>
-        <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
-          <int name="prevLength">2</int>
-          <int name="nextLength">2</int>
-          <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
-        </generator>
-        <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
-          <int name="prevLength">2</int>
-          <int name="nextLength">2</int>
-          <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
-            <bool name="begin">true</bool>
-            <bool name="end">false</bool>
-          </generator>
-        </generator>
-        <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
-        <generator class="opennlp.tools.util.featuregen.PosTaggerFeatureGeneratorFactory"/>
+<featureGenerators cache="true" name="posTagger">
+  <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.SuffixFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.PrefixFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+    <int name="prevLength">2</int>
+    <int name="nextLength">2</int>
+    <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+  </generator>
+  <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+    <int name="prevLength">2</int>
+    <int name="nextLength">2</int>
+    <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+      <bool name="begin">true</bool>
+      <bool name="end">false</bool>
     </generator>
+  </generator>
+  <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.PosTaggerFeatureGeneratorFactory"/>
 </featureGenerators>
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
index 35a58bb..4e95b20 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
@@ -208,7 +208,7 @@ public class GeneratorFactoryTest {
   @Test
   public void testNotAutomaticallyInsertAggregatedFeatureGeneratorChild() throws Exception {
     InputStream generatorDescriptorIn = getClass().getResourceAsStream(
-        "/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml");
+        "/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml");
 
     // If this fails the generator descriptor could not be found
     // at the expected location
@@ -225,7 +225,7 @@ public class GeneratorFactoryTest {
   @Test
   public void testAutomaticallyInsertAggregatedFeatureGeneratorChildren() throws Exception {
     InputStream generatorDescriptorIn = getClass().getResourceAsStream(
-        "/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml");
+        "/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml");
 
     // If this fails the generator descriptor could not be found
     // at the expected location
@@ -244,4 +244,27 @@ public class GeneratorFactoryTest {
       Assert.assertTrue(afgen instanceof OutcomePriorFeatureGenerator);
     }
   }
+
+  @Test
+  public void testInsertCachedFeatureGenerator() throws Exception {
+    InputStream generatorDescriptorIn = getClass().getResourceAsStream(
+        "/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml");
+
+    // If this fails the generator descriptor could not be found
+    // at the expected location
+    Assert.assertNotNull(generatorDescriptorIn);
+
+    AdaptiveFeatureGenerator featureGenerator = GeneratorFactory.create(generatorDescriptorIn, null);
+    Assert.assertTrue(featureGenerator instanceof CachedFeatureGenerator);
+    CachedFeatureGenerator cachedFeatureGenerator = (CachedFeatureGenerator)featureGenerator;
+
+    Assert.assertTrue(cachedFeatureGenerator.getCachedFeatureGenerator()
+        instanceof AggregatedFeatureGenerator);
+    AggregatedFeatureGenerator aggregatedFeatureGenerator =
+        (AggregatedFeatureGenerator)cachedFeatureGenerator.getCachedFeatureGenerator();
+    Assert.assertEquals(3, aggregatedFeatureGenerator.getGenerators().size());
+    for (AdaptiveFeatureGenerator afg: aggregatedFeatureGenerator.getGenerators()) {
+      Assert.assertTrue(afg instanceof OutcomePriorFeatureGenerator);
+    }
+  }
 }
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml b/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
index b850904..06c73df 100644
--- a/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
+++ b/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
@@ -18,20 +18,25 @@
 -->
 
 <!-- Default name finder feature generator configuration -->
-<generators>
-  <cache>
-    <generators>
-      <window prevLength = "2" nextLength = "2">
-        <tokenclass/>
-      </window>
-      <window prevLength = "2" nextLength = "2">
-        <token/>
-      </window>
-      <definition/>
-      <prevmap/>
-      <bigram/>
-      <sentence begin="true" end="false"/>
-      <tokenpos model="en-pos-perceptron.bin"/>
-    </generators>
-  </cache>
-</generators>
\ No newline at end of file
+<featureGenerators cache="true" name="nameFinder">
+  <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+    <int name="prevLength">2</int>
+    <int name="nextLength">2</int>
+    <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+  </generator>
+  <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+    <int name="prevLength">2</int>
+    <int name="nextLength">2</int>
+    <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+  </generator>
+  <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+    <bool name="begin">true</bool>
+    <bool name="end">false</bool>
+  </generator>
+  <generator class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
+    <str name="model">en-pos-perceptron.bin</str>
+  </generator>
+</featureGenerators>
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
index 7464627..c8b5887 100644
--- a/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
+++ b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
@@ -15,33 +15,29 @@
   ~ limitations under the License.
   -->
 
-<featureGenerators name="nameFinder">
-    <generator class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
-      <generator class="opennlp.tools.util.featuregen.AggregatedFeatureGeneratorFactory">
-        <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
-          <int name="prevLength">2</int>
-          <int name="nextLength">2</int>
-          <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
-        </generator>
-        <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
-          <int name="prevLength">2</int>
-          <int name="nextLength">2</int>
-          <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
-        </generator>
-        <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
-          <int name="prevLength">2</int>
-          <int name="nextLength">2</int>
-          <generator class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
-            <str name="model">pos-model.bin</str>
-          </generator>
-        </generator>
-        <generator class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
-        <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
-        <generator class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
-        <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
-          <bool name="begin">true</bool>
-          <bool name="end">false</bool>
-        </generator>
-      </generator>
+<featureGenerators cache="true" name="nameFinder">
+  <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+    <int name="prevLength">2</int>
+    <int name="nextLength">2</int>
+    <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+  </generator>
+  <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+    <int name="prevLength">2</int>
+    <int name="nextLength">2</int>
+    <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+  </generator>
+  <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+    <int name="prevLength">2</int>
+    <int name="nextLength">2</int>
+    <generator class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
+      <str name="model">pos-model.bin</str>
     </generator>
+  </generator>
+  <generator class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+    <bool name="begin">true</bool>
+    <bool name="end">false</bool>
+  </generator>
 </featureGenerators>
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
similarity index 67%
rename from opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml
rename to opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
index 7dbed59..08f1400 100644
--- a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml
+++ b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
@@ -19,10 +19,8 @@
 	under the License.
 -->
 
-<featureGenerators name="test">
-  <generator class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
-    <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
-    <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
-    <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
-  </generator>
+<featureGenerators cache="true" name="test">
+  <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
 </featureGenerators>
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml
similarity index 73%
copy from opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
copy to opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml
index ed7f2f6..08f1400 100644
--- a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
+++ b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml
@@ -19,8 +19,8 @@
 	under the License.
 -->
 
-<featureGenerators name="test">
-  <generator class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
-    <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
-  </generator>
+<featureGenerators cache="true" name="test">
+  <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+  <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
 </featureGenerators>
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
similarity index 79%
rename from opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
rename to opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
index ed7f2f6..801adad 100644
--- a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
+++ b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
@@ -19,8 +19,6 @@
 	under the License.
 -->
 
-<featureGenerators name="test">
-  <generator class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
-    <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
-  </generator>
+<featureGenerators cache="true" name="test">
+  <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
 </featureGenerators>

-- 
To stop receiving notification emails like this one, please contact
['"commits@opennlp.apache.org" <co...@opennlp.apache.org>'].