You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/03/10 22:01:56 UTC
svn commit: r1576085 - in
/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: cmdline/namefind/
namefind/
Author: joern
Date: Mon Mar 10 21:01:55 2014
New Revision: 1576085
URL: http://svn.apache.org/r1576085
Log:
OPENNLP-580 Added a factory to construct the name finder
Added:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParams.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java?rev=1576085&r1=1576084&r2=1576085&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java Mon Mar 10 21:01:55 2014
@@ -34,7 +34,9 @@ import opennlp.tools.namefind.NameSample
import opennlp.tools.namefind.NameSampleTypeFilter;
import opennlp.tools.namefind.TokenNameFinderCrossValidator;
import opennlp.tools.namefind.TokenNameFinderEvaluationMonitor;
+import opennlp.tools.namefind.TokenNameFinderFactory;
import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.SequenceCodec;
import opennlp.tools.util.eval.EvaluationMonitor;
import opennlp.tools.util.model.ModelUtil;
@@ -91,12 +93,20 @@ public final class TokenNameFinderCrossV
sequenceCodecImplName = BilouCodec.class.getName();
}
- SequenceCodec<String> sequenceCodec = TokenNameFinderModel.instantiateSequenceCodec(sequenceCodecImplName);
+ SequenceCodec<String> sequenceCodec = TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName);
+
+ TokenNameFinderFactory nameFinderFactory = null;
+ try {
+ nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(),
+ featureGeneratorBytes, resources, sequenceCodec);
+ } catch (InvalidFormatException e) {
+ throw new TerminateToolException(-1, e.getMessage(), e);
+ }
TokenNameFinderCrossValidator validator;
try {
validator = new TokenNameFinderCrossValidator(params.getLang(),
- params.getType(), mlParams, featureGeneratorBytes, resources, sequenceCodec,
+ params.getType(), mlParams, nameFinderFactory,
listeners.toArray(new TokenNameFinderEvaluationMonitor[listeners.size()]));
validator.evaluate(sampleStream, params.getFolds());
} catch (IOException e) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1576085&r1=1576084&r2=1576085&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java Mon Mar 10 21:01:55 2014
@@ -32,7 +32,9 @@ import opennlp.tools.namefind.BilouCodec
import opennlp.tools.namefind.BioCodec;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.namefind.NameSampleTypeFilter;
+import opennlp.tools.namefind.TokenNameFinderFactory;
import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.postag.POSTaggerFactory;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.SequenceCodec;
import opennlp.tools.util.model.ArtifactSerializer;
@@ -182,13 +184,21 @@ public final class TokenNameFinderTraine
sequenceCodecImplName = BilouCodec.class.getName();
}
- SequenceCodec<String> sequenceCodec = TokenNameFinderModel.instantiateSequenceCodec(sequenceCodecImplName);
+ SequenceCodec<String> sequenceCodec = TokenNameFinderFactory.instantiateSequenceCodec(sequenceCodecImplName);
+
+ TokenNameFinderFactory nameFinderFactory = null;
+ try {
+ nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(),
+ featureGeneratorBytes, resources, sequenceCodec);
+ } catch (InvalidFormatException e) {
+ throw new TerminateToolException(-1, e.getMessage(), e);
+ }
TokenNameFinderModel model;
try {
model = opennlp.tools.namefind.NameFinderME.train(
- params.getLang(), params.getType(), sampleStream,
- mlParams, featureGeneratorBytes, resources, sequenceCodec);
+ params.getLang(), params.getType(), sampleStream, mlParams,
+ nameFinderFactory);
}
catch (IOException e) {
throw new TerminateToolException(-1, "IO error while reading training data or indexing data: "
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParams.java?rev=1576085&r1=1576084&r2=1576085&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParams.java Mon Mar 10 21:01:55 2014
@@ -22,7 +22,6 @@ import java.io.File;
import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
import opennlp.tools.cmdline.params.BasicTrainingParams;
-import opennlp.tools.namefind.BioCodec;
/**
* TrainingParameters for Name Finder.
@@ -50,4 +49,8 @@ interface TrainingParams extends BasicTr
@OptionalParameter(defaultValue = "opennlp.tools.namefind.BioCodec")
@ParameterDescription(valueName = "codec", description = "sequence codec used to code name spans")
String getSequenceCodec();
+
+ @ParameterDescription(valueName = "factoryName", description = "A sub-class of TokenNameFinderFactory")
+ @OptionalParameter
+ String getFactory();
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java?rev=1576085&r1=1576084&r2=1576085&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java Mon Mar 10 21:01:55 2014
@@ -78,7 +78,10 @@ public class NameFinderEventStream exten
* @param type null or overrides the type parameter in the provided samples
* @param length The length of the sentence.
* @return An array of start, continue, other outcomes based on the specified names and sentence length.
+ *
+ * @deprecated use the BioCodec implementation of the SequenceValidator instead!
*/
+ @Deprecated
public static String[] generateOutcomes(Span[] names, String type, int length) {
String[] outcomes = new String[length];
for (int i = 0; i < outcomes.length; i++) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java?rev=1576085&r1=1576084&r2=1576085&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameSampleSequenceStream.java Mon Mar 10 21:01:55 2014
@@ -25,6 +25,7 @@ import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.Sequence;
import opennlp.tools.ml.model.SequenceStream;
import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.SequenceCodec;
import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
public class NameSampleSequenceStream implements SequenceStream {
@@ -32,6 +33,7 @@ public class NameSampleSequenceStream im
private NameContextGenerator pcg;
private final boolean useOutcomes;
private ObjectStream<NameSample> psi;
+ private SequenceCodec<String> seqCodec;
public NameSampleSequenceStream(ObjectStream<NameSample> psi) throws IOException {
this(psi, new DefaultNameContextGenerator((AdaptiveFeatureGenerator) null), true);
@@ -54,9 +56,16 @@ public class NameSampleSequenceStream im
public NameSampleSequenceStream(ObjectStream<NameSample> psi, NameContextGenerator pcg, boolean useOutcomes)
throws IOException {
+ this(psi, pcg, useOutcomes, new BioCodec());
+ }
+
+ public NameSampleSequenceStream(ObjectStream<NameSample> psi, NameContextGenerator pcg, boolean useOutcomes,
+ SequenceCodec<String> seqCodec)
+ throws IOException {
this.psi = psi;
this.useOutcomes = useOutcomes;
this.pcg = pcg;
+ this.seqCodec = seqCodec;
}
@SuppressWarnings("unchecked")
@@ -64,7 +73,7 @@ public class NameSampleSequenceStream im
Sequence<NameSample> pss = sequence;
TokenNameFinder tagger = new NameFinderME(new TokenNameFinderModel("x-unspecified", model, Collections.<String, Object>emptyMap(), null));
String[] sentence = pss.getSource().getSentence();
- String[] tags = NameFinderEventStream.generateOutcomes(tagger.find(sentence), null, sentence.length);
+ String[] tags = seqCodec.encode(tagger.find(sentence), sentence.length);
Event[] events = new Event[sentence.length];
NameFinderEventStream.generateEvents(sentence,tags,pcg).toArray(events);
@@ -77,7 +86,7 @@ public class NameSampleSequenceStream im
NameSample sample = psi.read();
if (sample != null) {
String sentence[] = sample.getSentence();
- String tags[] = NameFinderEventStream.generateOutcomes(sample.getNames(), null, sentence.length);
+ String tags[] = seqCodec.encode(sample.getNames(), sentence.length);
Event[] events = new Event[sentence.length];
for (int i=0; i < sentence.length; i++) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java?rev=1576085&r1=1576084&r2=1576085&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderCrossValidator.java Mon Mar 10 21:01:55 2014
@@ -138,13 +138,13 @@ public class TokenNameFinderCrossValidat
private final String languageCode;
private final TrainingParameters params;
private final String type;
- private final byte[] featureGeneratorBytes;
- private final Map<String, Object> resources;
+ private byte[] featureGeneratorBytes;
+ private Map<String, Object> resources;
private TokenNameFinderEvaluationMonitor[] listeners;
-
private FMeasure fmeasure = new FMeasure();
private SequenceCodec<String> codec;
+ private TokenNameFinderFactory factory;
/**
* Name finder cross validator
@@ -184,6 +184,17 @@ public class TokenNameFinderCrossValidat
TokenNameFinderEvaluationMonitor... listeners) {
this(languageCode, type, trainParams, featureGeneratorBytes, resources, new BioCodec(), listeners);
}
+
+ public TokenNameFinderCrossValidator(String languageCode, String type,
+ TrainingParameters trainParams, TokenNameFinderFactory factory,
+ TokenNameFinderEvaluationMonitor... listeners) {
+ this.languageCode = languageCode;
+ this.type = type;
+ this.params = trainParams;
+ this.factory = factory;
+ this.listeners = listeners;
+ }
+
/**
* Starts the evaluation.
*
@@ -206,8 +217,15 @@ public class TokenNameFinderCrossValidat
CrossValidationPartitioner.TrainingSampleStream<DocumentSample> trainingSampleStream = partitioner
.next();
- TokenNameFinderModel model = opennlp.tools.namefind.NameFinderME.train(languageCode, type,
+ TokenNameFinderModel model;
+ if (factory != null) {
+ model = opennlp.tools.namefind.NameFinderME.train(languageCode, type, samples, params, factory);
+ }
+ else {
+ model = opennlp.tools.namefind.NameFinderME.train(languageCode, type,
new DocumentToNameSampleStream(trainingSampleStream), params, featureGeneratorBytes, resources, codec);
+
+ }
// do testing
TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java?rev=1576085&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java Mon Mar 10 21:01:55 2014
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.namefind;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Map;
+import java.util.Properties;
+
+import opennlp.tools.chunker.ChunkerContextGenerator;
+import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.namefind.TokenNameFinderModel.FeatureGeneratorCreationError;
+import opennlp.tools.postag.POSTaggerFactory;
+import opennlp.tools.postag.TagDictionary;
+import opennlp.tools.util.BaseToolFactory;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.SequenceCodec;
+import opennlp.tools.util.SequenceValidator;
+import opennlp.tools.util.ext.ExtensionLoader;
+import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
+import opennlp.tools.util.featuregen.AdditionalContextFeatureGenerator;
+import opennlp.tools.util.featuregen.AggregatedFeatureGenerator;
+import opennlp.tools.util.featuregen.FeatureGeneratorResourceProvider;
+import opennlp.tools.util.featuregen.GeneratorFactory;
+
+// Idea of this factory is that most resources/impls used by the name finder
+// can be modified through this class!
+// That only works if thats the central class used for training/runtime
+
+public class TokenNameFinderFactory extends BaseToolFactory {
+
+ private byte[] featureGeneratorBytes;
+ private Map<String, Object> resources;
+ private SequenceCodec<String> seqCodec;
+
+ /**
+ * Creates a {@link TokenNameFinderFactory} that provides the default implementation
+ * of the resources.
+ */
+ public TokenNameFinderFactory() {
+ }
+
+
+ public TokenNameFinderFactory(byte[] featureGeneratorBytes, final Map<String, Object> resources,
+ SequenceCodec<String> seqCodec) {
+ init(featureGeneratorBytes, resources, seqCodec);
+ }
+
+ void init(byte[] featureGeneratorBytes, final Map<String, Object> resources, SequenceCodec<String> seqCodec) {
+ this.featureGeneratorBytes = featureGeneratorBytes;
+ this.resources = resources;
+ this.seqCodec = seqCodec;
+ }
+
+ protected SequenceCodec<String> getSequenceCodec() {
+ return seqCodec;
+ }
+
+ protected Map<String, Object> getResources() {
+ return resources;
+ }
+
+ public static TokenNameFinderFactory create(String subclassName, byte[] featureGeneratorBytes, final Map<String, Object> resources,
+ SequenceCodec<String> seqCodec)
+ throws InvalidFormatException {
+ if (subclassName == null) {
+ // will create the default factory
+ return new TokenNameFinderFactory();
+ }
+ try {
+ TokenNameFinderFactory theFactory = ExtensionLoader.instantiateExtension(
+ TokenNameFinderFactory.class, subclassName);
+ theFactory.init(featureGeneratorBytes, resources, seqCodec);
+ return theFactory;
+ } catch (Exception e) {
+ String msg = "Could not instantiate the " + subclassName
+ + ". The initialization throw an exception.";
+ System.err.println(msg);
+ e.printStackTrace();
+ throw new InvalidFormatException(msg, e);
+ }
+ }
+
+ @Override
+ public void validateArtifactMap() throws InvalidFormatException {
+ // no additional artifacts
+ }
+
+ public SequenceCodec<String> createSequenceCodec() {
+
+ if (artifactProvider != null) {
+ String sequeceCodecImplName = artifactProvider.getManifestProperty(
+ TokenNameFinderModel.SEQUENCE_CODEC_CLASS_NAME_PARAMETER);
+ return instantiateSequenceCodec(sequeceCodecImplName);
+ }
+ else {
+ return seqCodec;
+ }
+ }
+
+ public NameContextGenerator createContextGenerator() {
+
+ AdaptiveFeatureGenerator featureGenerator = createFeatureGenerators();
+
+ if (featureGenerator == null) {
+ featureGenerator = NameFinderME.createFeatureGenerator();
+ }
+
+ return new DefaultNameContextGenerator(featureGenerator);
+ }
+
+ /**
+ * Creates the {@link AdaptiveFeatureGenerator}. Usually this
+ * is a set of generators contained in the {@link AggregatedFeatureGenerator}.
+ *
+ * Note:
+ * The generators are created on every call to this method.
+ *
+ * @return the feature generator or null if there is no descriptor in the model
+ */
+ // TODO: During training time the resources need to be loaded from the resources map!
+ public AdaptiveFeatureGenerator createFeatureGenerators() {
+
+ byte descriptorBytes[] = null;
+ if (featureGeneratorBytes == null && artifactProvider != null) {
+ descriptorBytes = (byte[]) artifactProvider.getArtifact(
+ TokenNameFinderModel.GENERATOR_DESCRIPTOR_ENTRY_NAME);
+ }
+ else {
+ descriptorBytes = featureGeneratorBytes;
+ }
+
+ if (descriptorBytes != null) {
+ InputStream descriptorIn = new ByteArrayInputStream(descriptorBytes);
+
+ AdaptiveFeatureGenerator generator = null;
+ try {
+ generator = GeneratorFactory.create(descriptorIn, new FeatureGeneratorResourceProvider() {
+
+ public Object getResource(String key) {
+ if (artifactProvider != null) {
+ return artifactProvider.getArtifact(key);
+ }
+ else {
+ return resources.get(key);
+ }
+ }
+ });
+ } catch (InvalidFormatException e) {
+ // It is assumed that the creation of the feature generation does not
+ // fail after it succeeded once during model loading.
+
+ // But it might still be possible that such an exception is thrown,
+ // in this case the caller should not be forced to handle the exception
+ // and a Runtime Exception is thrown instead.
+
+ // If the re-creation of the feature generation fails it is assumed
+ // that this can only be caused by a programming mistake and therefore
+ // throwing a Runtime Exception is reasonable
+
+ throw new FeatureGeneratorCreationError(e);
+ } catch (IOException e) {
+ throw new IllegalStateException("Reading from mem cannot result in an I/O error", e);
+ }
+
+ return generator;
+ }
+ else {
+ return null;
+ }
+ }
+
+ public static SequenceCodec<String> instantiateSequenceCodec(
+ String sequenceCodecImplName) {
+
+ if (sequenceCodecImplName != null) {
+ return ExtensionLoader.instantiateExtension(
+ SequenceCodec.class, sequenceCodecImplName);
+ }
+ else {
+ // If nothing is specified return old default!
+ return new BioCodec();
+ }
+ }
+}
+
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java?rev=1576085&r1=1576084&r2=1576085&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java Mon Mar 10 21:01:55 2014
@@ -28,9 +28,11 @@ import java.util.Collections;
import java.util.Map;
import java.util.Properties;
+import opennlp.tools.chunker.ChunkerFactory;
import opennlp.tools.ml.BeamSearch;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.ml.model.SequenceClassificationModel;
+import opennlp.tools.util.BaseToolFactory;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.SequenceCodec;
import opennlp.tools.util.ext.ExtensionLoader;
@@ -74,9 +76,9 @@ public class TokenNameFinderModel extend
private static final String COMPONENT_NAME = "NameFinderME";
private static final String MAXENT_MODEL_ENTRY_NAME = "nameFinder.model";
- private static final String GENERATOR_DESCRIPTOR_ENTRY_NAME = "generator.featuregen";
+ static final String GENERATOR_DESCRIPTOR_ENTRY_NAME = "generator.featuregen";
- private static final String SEQUENCE_CODEC_CLASS_NAME_PARAMETER = "sequenceCodecImplName";
+ static final String SEQUENCE_CODEC_CLASS_NAME_PARAMETER = "sequenceCodecImplName";
public TokenNameFinderModel(String languageCode, SequenceClassificationModel<String> nameFinderModel,
byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries,
@@ -193,16 +195,18 @@ public class TokenNameFinderModel extend
}
}
- public SequenceCodec<String> createSequenceCodec() {
-
- // TODO: Lookup impl name with
- // SEQUENCE_CODEC_CLASS_NAME_PARAMETER
- Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
-
- String sequeceCodecImplName = manifest.getProperty(SEQUENCE_CODEC_CLASS_NAME_PARAMETER);
- return instantiateSequenceCodec(sequeceCodecImplName);
+ @Override
+ protected Class<? extends BaseToolFactory> getDefaultFactory() {
+ return TokenNameFinderFactory.class;
}
+ public TokenNameFinderFactory getFactory() {
+ return (TokenNameFinderFactory) this.toolFactory;
+ }
+
+ // TODO: This should be moved to the NameFinderFactory ... !!!
+ // Lets deprecate it!
+
/**
* Creates the {@link AdaptiveFeatureGenerator}. Usually this
* is a set of generators contained in the {@link AggregatedFeatureGenerator}.
@@ -211,44 +215,11 @@ public class TokenNameFinderModel extend
* The generators are created on every call to this method.
*
* @return the feature generator or null if there is no descriptor in the model
+ * @deprecated use TokenNameFinderFactory.createFeatureGenerators instead!
*/
+ @Deprecated
public AdaptiveFeatureGenerator createFeatureGenerators() {
-
- byte descriptorBytes[] = (byte[]) artifactMap.get(GENERATOR_DESCRIPTOR_ENTRY_NAME);
-
- if (descriptorBytes != null) {
- InputStream descriptorIn = new ByteArrayInputStream(descriptorBytes);
-
- AdaptiveFeatureGenerator generator = null;
- try {
- generator = GeneratorFactory.create(descriptorIn, new FeatureGeneratorResourceProvider() {
-
- public Object getResource(String key) {
- return artifactMap.get(key);
- }
- });
- } catch (InvalidFormatException e) {
- // It is assumed that the creation of the feature generation does not
- // fail after it succeeded once during model loading.
-
- // But it might still be possible that such an exception is thrown,
- // in this case the caller should not be forced to handle the exception
- // and a Runtime Exception is thrown instead.
-
- // If the re-creation of the feature generation fails it is assumed
- // that this can only be caused by a programming mistake and therefore
- // throwing a Runtime Exception is reasonable
-
- throw new FeatureGeneratorCreationError(e);
- } catch (IOException e) {
- throw new IllegalStateException("Reading from mem cannot result in an I/O error", e);
- }
-
- return generator;
- }
- else {
- return null;
- }
+ return getFactory().createFeatureGenerators();
}
public TokenNameFinderModel updateFeatureGenerator(byte descriptor[]) {
@@ -257,12 +228,13 @@ public class TokenNameFinderModel extend
if (getNameFinderModel() != null) {
model = new TokenNameFinderModel(getLanguage(), getNameFinderModel(), 1,
- descriptor, Collections.<String, Object>emptyMap(), Collections.<String, String>emptyMap(), createSequenceCodec());
+ descriptor, Collections.<String, Object>emptyMap(), Collections.<String, String>emptyMap(),
+ getFactory().createSequenceCodec());
}
else {
model = new TokenNameFinderModel(getLanguage(), getNameFinderSequenceModel(),
descriptor, Collections.<String, Object>emptyMap(), Collections.<String, String>emptyMap(),
- createSequenceCodec());
+ getFactory().createSequenceCodec());
}
model.artifactMap.clear();
@@ -296,7 +268,7 @@ public class TokenNameFinderModel extend
return serializers;
}
- public boolean isModelValid(MaxentModel model) {
+ boolean isModelValid(MaxentModel model) {
String outcomes[] = new String[model.getNumOutcomes()];
@@ -304,7 +276,7 @@ public class TokenNameFinderModel extend
outcomes[i] = model.getOutcome(i);
}
- return createSequenceCodec().areOutcomesCompatible(outcomes);
+ return getFactory().createSequenceCodec().areOutcomesCompatible(outcomes);
}
@Override
@@ -321,17 +293,4 @@ public class TokenNameFinderModel extend
throw new InvalidFormatException("Token Name Finder model is incomplete!");
}
}
-
- public static SequenceCodec<String> instantiateSequenceCodec(
- String sequenceCodecImplName) {
-
- if (sequenceCodecImplName != null) {
- return ExtensionLoader.instantiateExtension(
- SequenceCodec.class, sequenceCodecImplName);
- }
- else {
- // If nothing is specified return old default!
- return new BioCodec();
- }
- }
}