You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ko...@apache.org on 2018/01/12 02:50:21 UTC
[opennlp] branch master updated: OPENNLP-1160: avoid letting users
specify CachedFeatureGeneratorFactory in XML config (#305)
This is an automated email from the ASF dual-hosted git repository.
koji pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/master by this push:
new c9685cd OPENNLP-1160: avoid letting users specify CachedFeatureGeneratorFactory in XML config (#305)
c9685cd is described below
commit c9685cd5e462926aa401fca7321ea23927492006
Author: Koji Sekiguchi <ko...@rondhuit.com>
AuthorDate: Fri Jan 12 11:50:19 2018 +0900
OPENNLP-1160: avoid letting users specify CachedFeatureGeneratorFactory in XML config (#305)
---
.../tools/util/featuregen/GeneratorFactory.java | 16 ++++++-
.../tools/namefind/ner-default-features.xml | 38 ++++++++--------
.../opennlp/tools/postag/pos-default-features.xml | 38 ++++++++--------
.../util/featuregen/GeneratorFactoryTest.java | 27 ++++++++++-
.../opennlp/tools/eval/ner-en_pos-features.xml | 39 +++++++++-------
.../opennlp/tools/namefind/ner-pos-features.xml | 52 ++++++++++------------
...callyInsertAggregatedFeatureGeneratorCache.xml} | 10 ++---
...ld.xml => TestInsertCachedFeatureGenerator.xml} | 8 ++--
...callyInsertAggregatedFeatureGeneratorCache.xml} | 6 +--
9 files changed, 131 insertions(+), 103 deletions(-)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
index 867c1e0..bf55abf 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
@@ -469,13 +469,25 @@ public class GeneratorFactory {
}
}
+ AdaptiveFeatureGenerator featureGenerator = null;
if (generators.size() == 1)
- return generators.get(0);
+ featureGenerator = generators.get(0);
else if (generators.size() > 1)
- return new AggregatedFeatureGenerator(generators.toArray(
+ featureGenerator = new AggregatedFeatureGenerator(generators.toArray(
new AdaptiveFeatureGenerator[generators.size()]));
else
throw new InvalidFormatException("featureGenerators must have one or more generators");
+
+ // disallow manually specifying CachedFeatureGenerator
+ if (featureGenerator instanceof CachedFeatureGenerator)
+ throw new InvalidFormatException("CachedFeatureGeneratorFactory cannot be specified manually." +
+ "Use cache=\"true\" attribute in featureGenerators element instead.");
+
+ // check cache usage
+ if (Boolean.parseBoolean(generatorElement.getAttribute("cache")))
+ return new CachedFeatureGenerator(featureGenerator);
+ else
+ return featureGenerator;
}
else {
// support classic format
diff --git a/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml b/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
index 32887cf..1f60ad1 100644
--- a/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
+++ b/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
@@ -18,24 +18,22 @@
-->
<!-- Default name finder feature generator configuration -->
-<featureGenerators name="nameFinder">
- <generator class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
- </generator>
- <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
- </generator>
- <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
- <generator class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
- <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
- <bool name="begin">true</bool>
- <bool name="end">false</bool>
- </generator>
- </generator>
+<featureGenerators cache="true" name="nameFinder">
+ <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ </generator>
+ <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
+ </generator>
</featureGenerators>
diff --git a/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
index c1be8ee..2137511 100644
--- a/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
+++ b/opennlp-tools/src/main/resources/opennlp/tools/postag/pos-default-features.xml
@@ -18,25 +18,23 @@
-->
<!-- Default pos tagger feature generator configuration -->
-<featureGenerators name="posTagger">
- <generator class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator class="opennlp.tools.util.featuregen.SuffixFeatureGeneratorFactory"/>
- <generator class="opennlp.tools.util.featuregen.PrefixFeatureGeneratorFactory"/>
- <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
- </generator>
- <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
- <bool name="begin">true</bool>
- <bool name="end">false</bool>
- </generator>
- </generator>
- <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
- <generator class="opennlp.tools.util.featuregen.PosTaggerFeatureGeneratorFactory"/>
+<featureGenerators cache="true" name="posTagger">
+ <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.SuffixFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.PrefixFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
</generator>
+ </generator>
+ <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.PosTaggerFeatureGeneratorFactory"/>
</featureGenerators>
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
index 35a58bb..4e95b20 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
@@ -208,7 +208,7 @@ public class GeneratorFactoryTest {
@Test
public void testNotAutomaticallyInsertAggregatedFeatureGeneratorChild() throws Exception {
InputStream generatorDescriptorIn = getClass().getResourceAsStream(
- "/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml");
+ "/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml");
// If this fails the generator descriptor could not be found
// at the expected location
@@ -225,7 +225,7 @@ public class GeneratorFactoryTest {
@Test
public void testAutomaticallyInsertAggregatedFeatureGeneratorChildren() throws Exception {
InputStream generatorDescriptorIn = getClass().getResourceAsStream(
- "/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml");
+ "/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml");
// If this fails the generator descriptor could not be found
// at the expected location
@@ -244,4 +244,27 @@ public class GeneratorFactoryTest {
Assert.assertTrue(afgen instanceof OutcomePriorFeatureGenerator);
}
}
+
+ @Test
+ public void testInsertCachedFeatureGenerator() throws Exception {
+ InputStream generatorDescriptorIn = getClass().getResourceAsStream(
+ "/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml");
+
+ // If this fails the generator descriptor could not be found
+ // at the expected location
+ Assert.assertNotNull(generatorDescriptorIn);
+
+ AdaptiveFeatureGenerator featureGenerator = GeneratorFactory.create(generatorDescriptorIn, null);
+ Assert.assertTrue(featureGenerator instanceof CachedFeatureGenerator);
+ CachedFeatureGenerator cachedFeatureGenerator = (CachedFeatureGenerator)featureGenerator;
+
+ Assert.assertTrue(cachedFeatureGenerator.getCachedFeatureGenerator()
+ instanceof AggregatedFeatureGenerator);
+ AggregatedFeatureGenerator aggregatedFeatureGenerator =
+ (AggregatedFeatureGenerator)cachedFeatureGenerator.getCachedFeatureGenerator();
+ Assert.assertEquals(3, aggregatedFeatureGenerator.getGenerators().size());
+ for (AdaptiveFeatureGenerator afg: aggregatedFeatureGenerator.getGenerators()) {
+ Assert.assertTrue(afg instanceof OutcomePriorFeatureGenerator);
+ }
+ }
}
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml b/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
index b850904..06c73df 100644
--- a/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
+++ b/opennlp-tools/src/test/resources/opennlp/tools/eval/ner-en_pos-features.xml
@@ -18,20 +18,25 @@
-->
<!-- Default name finder feature generator configuration -->
-<generators>
- <cache>
- <generators>
- <window prevLength = "2" nextLength = "2">
- <tokenclass/>
- </window>
- <window prevLength = "2" nextLength = "2">
- <token/>
- </window>
- <definition/>
- <prevmap/>
- <bigram/>
- <sentence begin="true" end="false"/>
- <tokenpos model="en-pos-perceptron.bin"/>
- </generators>
- </cache>
-</generators>
\ No newline at end of file
+<featureGenerators cache="true" name="nameFinder">
+ <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ </generator>
+ <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
+ </generator>
+ <generator class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
+ <str name="model">en-pos-perceptron.bin</str>
+ </generator>
+</featureGenerators>
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
index 7464627..c8b5887 100644
--- a/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
+++ b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
@@ -15,33 +15,29 @@
~ limitations under the License.
-->
-<featureGenerators name="nameFinder">
- <generator class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator class="opennlp.tools.util.featuregen.AggregatedFeatureGeneratorFactory">
- <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
- </generator>
- <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
- </generator>
- <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
- <int name="prevLength">2</int>
- <int name="nextLength">2</int>
- <generator class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
- <str name="model">pos-model.bin</str>
- </generator>
- </generator>
- <generator class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
- <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
- <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
- <bool name="begin">true</bool>
- <bool name="end">false</bool>
- </generator>
- </generator>
+<featureGenerators cache="true" name="nameFinder">
+ <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator class="opennlp.tools.util.featuregen.TokenClassFeatureGeneratorFactory"/>
+ </generator>
+ <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator class="opennlp.tools.util.featuregen.TokenFeatureGeneratorFactory"/>
+ </generator>
+ <generator class="opennlp.tools.util.featuregen.WindowFeatureGeneratorFactory">
+ <int name="prevLength">2</int>
+ <int name="nextLength">2</int>
+ <generator class="opennlp.tools.util.featuregen.POSTaggerNameFeatureGeneratorFactory">
+ <str name="model">pos-model.bin</str>
</generator>
+ </generator>
+ <generator class="opennlp.tools.util.featuregen.PreviousMapFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.BigramNameFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.SentenceFeatureGeneratorFactory">
+ <bool name="begin">true</bool>
+ <bool name="end">false</bool>
+ </generator>
</featureGenerators>
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
similarity index 67%
rename from opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml
rename to opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
index 7dbed59..08f1400 100644
--- a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorChildren.xml
+++ b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
@@ -19,10 +19,8 @@
under the License.
-->
-<featureGenerators name="test">
- <generator class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- </generator>
+<featureGenerators cache="true" name="test">
+ <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
</featureGenerators>
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml
similarity index 73%
copy from opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
copy to opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml
index ed7f2f6..08f1400 100644
--- a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
+++ b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestInsertCachedFeatureGenerator.xml
@@ -19,8 +19,8 @@
under the License.
-->
-<featureGenerators name="test">
- <generator class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- </generator>
+<featureGenerators cache="true" name="test">
+ <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
+ <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
</featureGenerators>
diff --git a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
similarity index 79%
rename from opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
rename to opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
index ed7f2f6..801adad 100644
--- a/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorChild.xml
+++ b/opennlp-tools/src/test/resources/opennlp/tools/util/featuregen/TestNotAutomaticallyInsertAggregatedFeatureGeneratorCache.xml
@@ -19,8 +19,6 @@
under the License.
-->
-<featureGenerators name="test">
- <generator class="opennlp.tools.util.featuregen.CachedFeatureGeneratorFactory">
- <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
- </generator>
+<featureGenerators cache="true" name="test">
+ <generator class="opennlp.tools.util.featuregen.DefinitionFeatureGeneratorFactory"/>
</featureGenerators>
--
To stop receiving notification emails like this one, please contact
['"commits@opennlp.apache.org" <co...@opennlp.apache.org>'].