You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2012/10/02 16:45:15 UTC
svn commit: r1392937 - in /opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/chunker/ main/java/opennlp/tools/cmdline/chunker/
test/java/opennlp/tools/chunker/
Author: colen
Date: Tue Oct 2 14:45:14 2012
New Revision: 1392937
URL: http://svn.apache.org/viewvc?rev=1392937&view=rev
Log:
OPENNLP-539: Implemented customization factory for the Chunker
Added:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java (with props)
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java (with props)
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java (with props)
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java Tue Oct 2 14:45:14 2012
@@ -33,10 +33,12 @@ public class ChunkerCrossValidator {
private FMeasure fmeasure = new FMeasure();
private ChunkerEvaluationMonitor[] listeners;
+ private ChunkerFactory chunkerFactory;
/**
- * @deprecated use {@link ChunkerCrossValidator#ChunkerCrossValidator(String, TrainingParameters, ChunkerEvaluationMonitor...)}
- * instead and pass in a TrainingParameters object.
+ * @deprecated Use
+ * {@link #ChunkerCrossValidator(String, TrainingParameters, ChunkerFactory, ChunkerEvaluationMonitor...)}
+ * instead.
*/
@Deprecated
public ChunkerCrossValidator(String languageCode, int cutoff, int iterations) {
@@ -47,6 +49,9 @@ public class ChunkerCrossValidator {
listeners = null;
}
+ /**
+ * @deprecated Use {@link #ChunkerCrossValidator(String, TrainingParameters, ChunkerFactory, ChunkerEvaluationMonitor...)} instead.
+ */
public ChunkerCrossValidator(String languageCode, TrainingParameters params,
ChunkerEvaluationMonitor... listeners) {
@@ -54,6 +59,14 @@ public class ChunkerCrossValidator {
this.params = params;
this.listeners = listeners;
}
+
+ public ChunkerCrossValidator(String languageCode, TrainingParameters params,
+ ChunkerFactory factory, ChunkerEvaluationMonitor... listeners) {
+ this.chunkerFactory = factory;
+ this.languageCode = languageCode;
+ this.params = params;
+ this.listeners = listeners;
+ }
/**
* Starts the evaluation.
@@ -76,12 +89,11 @@ public class ChunkerCrossValidator {
.next();
ChunkerModel model = ChunkerME.train(languageCode, trainingSampleStream,
- new DefaultChunkerContextGenerator(), params);
+ params, chunkerFactory);
// do testing
ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model,
- ChunkerME.DEFAULT_BEAM_SIZE, new DefaultChunkerSequenceValidator()),
- listeners);
+ ChunkerME.DEFAULT_BEAM_SIZE), listeners);
evaluator.evaluate(trainingSampleStream.getTestSampleStream());
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java Tue Oct 2 14:45:14 2012
@@ -48,6 +48,8 @@ public class ChunkerEventStream extends
/**
* Creates a new event stream based on the specified data stream.
* @param d The data stream for this event stream.
+ *
+ * @deprecated Use {@link #ChunkerEventStream(ObjectStream, ChunkerContextGenerator)} instead.
*/
public ChunkerEventStream(ObjectStream<ChunkSample> d) {
this(d, new DefaultChunkerContextGenerator());
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java?rev=1392937&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java Tue Oct 2 14:45:14 2012
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.chunker;
+
+import opennlp.tools.util.BaseToolFactory;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.SequenceValidator;
+import opennlp.tools.util.ext.ExtensionLoader;
+
+public class ChunkerFactory extends BaseToolFactory {
+
+ /**
+ * Creates a {@link ChunkerFactory} that provides the default implementation
+ * of the resources.
+ */
+ public ChunkerFactory() {
+ }
+
+ public static ChunkerFactory create(String subclassName)
+ throws InvalidFormatException {
+ if (subclassName == null) {
+ // will create the default factory
+ return new ChunkerFactory();
+ }
+ try {
+ ChunkerFactory theFactory = ExtensionLoader.instantiateExtension(
+ ChunkerFactory.class, subclassName);
+ return theFactory;
+ } catch (Exception e) {
+ String msg = "Could not instantiate the " + subclassName
+ + ". The initialization throw an exception.";
+ System.err.println(msg);
+ e.printStackTrace();
+ throw new InvalidFormatException(msg, e);
+ }
+ }
+
+ @Override
+ public void validateArtifactMap() throws InvalidFormatException {
+ // no additional artifacts
+ }
+
+ public SequenceValidator<String> getSequenceValidator() {
+ return new DefaultChunkerSequenceValidator();
+ }
+
+ public ChunkerContextGenerator getContextGenerator() {
+ return new DefaultChunkerContextGenerator();
+ }
+}
Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java Tue Oct 2 14:45:14 2012
@@ -64,6 +64,8 @@ public class ChunkerME implements Chunke
* @param sequenceValidator The {@link SequenceValidator} to determines whether the outcome
* is valid for the preceding sequence. This can be used to implement constraints
* on what sequences are valid.
+ * @deprecated Use {@link #ChunkerME(ChunkerModel, int)} instead
+ * and use the {@link ChunkerFactory} to configure the {@link SequenceValidator} and {@link ChunkerContextGenerator}.
*/
public ChunkerME(ChunkerModel model, int beamSize, SequenceValidator<String> sequenceValidator,
ChunkerContextGenerator contextGenerator) {
@@ -80,6 +82,8 @@ public class ChunkerME implements Chunke
* @param sequenceValidator The {@link SequenceValidator} to determines whether the outcome
* is valid for the preceding sequence. This can be used to implement constraints
* on what sequences are valid.
+ * @deprecated Use {@link #ChunkerME(ChunkerModel, int)} instead
+ * and use the {@link ChunkerFactory} to configure the {@link SequenceValidator}.
*/
public ChunkerME(ChunkerModel model, int beamSize,
SequenceValidator<String> sequenceValidator) {
@@ -95,7 +99,10 @@ public class ChunkerME implements Chunke
* @param beamSize The size of the beam that should be used when decoding sequences.
*/
public ChunkerME(ChunkerModel model, int beamSize) {
- this(model, beamSize, null);
+ this.model = model.getChunkerModel();
+ ChunkerContextGenerator contextGenerator = model.getFactory().getContextGenerator();
+ SequenceValidator<String> sequenceValidator = model.getFactory().getSequenceValidator();
+ beam = new BeamSearch<String>(beamSize, contextGenerator, this.model, sequenceValidator, 0);
}
/**
@@ -196,7 +203,25 @@ public class ChunkerME implements Chunke
public double[] probs() {
return bestSequence.getProbs();
}
+
+ public static ChunkerModel train(String lang, ObjectStream<ChunkSample> in,
+ TrainingParameters mlParams, ChunkerFactory factory) throws IOException {
+
+ Map<String, String> manifestInfoEntries = new HashMap<String, String>();
+
+ EventStream es = new ChunkerEventStream(in, factory.getContextGenerator());
+ AbstractModel maxentModel = TrainUtil.train(es, mlParams.getSettings(),
+ manifestInfoEntries);
+
+ return new ChunkerModel(lang, maxentModel, manifestInfoEntries, factory);
+ }
+
+ /**
+ * @deprecated Use
+ * {@link #train(String, ObjectStream, ChunkerContextGenerator, TrainingParameters, ChunkerFactory)}
+ * instead.
+ */
public static ChunkerModel train(String lang, ObjectStream<ChunkSample> in,
ChunkerContextGenerator contextGenerator, TrainingParameters mlParams)
throws IOException {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java Tue Oct 2 14:45:14 2012
@@ -30,6 +30,7 @@ import java.util.Map;
import opennlp.model.AbstractModel;
import opennlp.model.BinaryFileDataReader;
import opennlp.model.GenericModelReader;
+import opennlp.tools.util.BaseToolFactory;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.model.BaseModel;
@@ -44,17 +45,33 @@ public class ChunkerModel extends BaseMo
private static final String COMPONENT_NAME = "ChunkerME";
private static final String CHUNKER_MODEL_ENTRY_NAME = "chunker.model";
+ /**
+ * @deprecated Use
+ * {@link #ChunkerModel(String, AbstractModel, Map, ChunkerFactory)}
+ * instead.
+ */
public ChunkerModel(String languageCode, AbstractModel chunkerModel, Map<String, String> manifestInfoEntries) {
-
- super(COMPONENT_NAME, languageCode, manifestInfoEntries);
-
+ this(languageCode, chunkerModel, manifestInfoEntries, new ChunkerFactory());
+ }
+
+ public ChunkerModel(String languageCode, AbstractModel chunkerModel,
+ Map<String, String> manifestInfoEntries, ChunkerFactory factory) {
+ super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
artifactMap.put(CHUNKER_MODEL_ENTRY_NAME, chunkerModel);
-
checkArtifactMap();
}
+ /**
+ * @deprecated Use
+ * {@link #ChunkerModel(String, AbstractModel, ChunkerFactory)
+ * instead.}
+ */
public ChunkerModel(String languageCode, AbstractModel chunkerModel) {
- this(languageCode, chunkerModel, null);
+ this(languageCode, chunkerModel, null, new ChunkerFactory());
+ }
+
+ public ChunkerModel(String languageCode, AbstractModel chunkerModel, ChunkerFactory factory) {
+ this(languageCode, chunkerModel, null, factory);
}
public ChunkerModel(InputStream in) throws IOException, InvalidFormatException {
@@ -82,6 +99,16 @@ public class ChunkerModel extends BaseMo
return (AbstractModel) artifactMap.get(CHUNKER_MODEL_ENTRY_NAME);
}
+ @Override
+ protected Class<? extends BaseToolFactory> getDefaultFactory() {
+ return ChunkerFactory.class;
+ }
+
+
+ public ChunkerFactory getFactory() {
+ return (ChunkerFactory) this.toolFactory;
+ }
+
public static void main(String[] args) throws FileNotFoundException, IOException {
if (args.length != 4){
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java Tue Oct 2 14:45:14 2012
@@ -24,6 +24,7 @@ import java.util.List;
import opennlp.tools.chunker.ChunkSample;
import opennlp.tools.chunker.ChunkerCrossValidator;
import opennlp.tools.chunker.ChunkerEvaluationMonitor;
+import opennlp.tools.chunker.ChunkerFactory;
import opennlp.tools.cmdline.AbstractCrossValidatorTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
@@ -67,11 +68,15 @@ public final class ChunkerCrossValidator
listeners.add(detailedFMeasureListener);
}
- ChunkerCrossValidator validator = new ChunkerCrossValidator(
- factory.getLang(), mlParams,
- listeners.toArray(new ChunkerEvaluationMonitor[listeners.size()]));
-
+ ChunkerCrossValidator validator;
+
try {
+ ChunkerFactory chunkerFactory = ChunkerFactory
+ .create(params.getFactory());
+
+ validator = new ChunkerCrossValidator(factory.getLang(), mlParams,
+ chunkerFactory,
+ listeners.toArray(new ChunkerEvaluationMonitor[listeners.size()]));
validator.evaluate(sampleStream, params.getFolds());
}
catch (IOException e) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java Tue Oct 2 14:45:14 2012
@@ -26,7 +26,6 @@ import opennlp.tools.chunker.ChunkerEval
import opennlp.tools.chunker.ChunkerEvaluator;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
-import opennlp.tools.chunker.DefaultChunkerSequenceValidator;
import opennlp.tools.cmdline.AbstractEvaluatorTool;
import opennlp.tools.cmdline.PerformanceMonitor;
import opennlp.tools.cmdline.TerminateToolException;
@@ -66,7 +65,7 @@ public final class ChunkerEvaluatorTool
}
ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model,
- ChunkerME.DEFAULT_BEAM_SIZE, new DefaultChunkerSequenceValidator()),
+ ChunkerME.DEFAULT_BEAM_SIZE),
listeners.toArray(new ChunkerEvaluationMonitor[listeners.size()]));
final PerformanceMonitor monitor = new PerformanceMonitor("sent");
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java Tue Oct 2 14:45:14 2012
@@ -24,7 +24,6 @@ import java.io.InputStreamReader;
import opennlp.tools.chunker.ChunkSample;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
-import opennlp.tools.chunker.DefaultChunkerSequenceValidator;
import opennlp.tools.cmdline.AbstractBasicCmdLineTool;
import opennlp.tools.cmdline.CLI;
import opennlp.tools.cmdline.CmdLineUtil;
@@ -50,8 +49,7 @@ public class ChunkerMETool extends Abstr
} else {
ChunkerModel model = new ChunkerModelLoader().load(new File(args[0]));
- ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE,
- new DefaultChunkerSequenceValidator());
+ ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE);
ObjectStream<String> lineStream =
new PlainTextByLineStream(new InputStreamReader(System.in));
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java Tue Oct 2 14:45:14 2012
@@ -21,9 +21,9 @@ import java.io.File;
import java.io.IOException;
import opennlp.tools.chunker.ChunkSample;
+import opennlp.tools.chunker.ChunkerFactory;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
-import opennlp.tools.chunker.DefaultChunkerContextGenerator;
import opennlp.tools.cmdline.AbstractTrainerTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
@@ -63,8 +63,10 @@ public class ChunkerTrainerTool
ChunkerModel model;
try {
- model = ChunkerME.train(factory.getLang(), sampleStream,
- new DefaultChunkerContextGenerator(), mlParams);
+ ChunkerFactory chunkerFactory = ChunkerFactory
+ .create(params.getFactory());
+ model = ChunkerME.train(factory.getLang(), sampleStream, mlParams,
+ chunkerFactory);
} catch (IOException e) {
throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " +
e.getMessage(), e);
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java Tue Oct 2 14:45:14 2012
@@ -17,6 +17,8 @@
package opennlp.tools.cmdline.chunker;
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
import opennlp.tools.cmdline.params.BasicTrainingParams;
/**
@@ -26,4 +28,8 @@ import opennlp.tools.cmdline.params.Basi
*/
interface TrainingParams extends BasicTrainingParams {
+ @ParameterDescription(valueName = "factoryName", description = "A sub-class of ChunkerFactory where to get implementation and resources.")
+ @OptionalParameter
+ String getFactory();
+
}
Added: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java?rev=1392937&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java (added)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java Tue Oct 2 14:45:14 2012
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.chunker;
+
+import static org.junit.Assert.*;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelType;
+
+import org.junit.Test;
+
+/**
+ * Tests for the {@link ChunkerFactory} class.
+ */
+public class ChunkerFactoryTest {
+
+ private static ObjectStream<ChunkSample> createSampleStream()
+ throws IOException {
+ InputStream in = ChunkerFactoryTest.class.getClassLoader()
+ .getResourceAsStream("opennlp/tools/chunker/test.txt");
+ Reader sentences = new InputStreamReader(in);
+
+ ChunkSampleStream stream = new ChunkSampleStream(new PlainTextByLineStream(
+ sentences));
+ return stream;
+ }
+
+ static ChunkerModel trainModel(ModelType type, ChunkerFactory factory)
+ throws IOException {
+ return ChunkerME.train("en", createSampleStream(),
+ TrainingParameters.defaultParams(), factory);
+ }
+
+ @Test
+ public void testDefaultFactory() throws IOException {
+
+ ChunkerModel model = trainModel(ModelType.MAXENT, new ChunkerFactory());
+
+ ChunkerFactory factory = model.getFactory();
+ assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator);
+ assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator);
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ model.serialize(out);
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+
+ ChunkerModel fromSerialized = new ChunkerModel(in);
+
+ factory = fromSerialized.getFactory();
+ assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator);
+ assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator);
+ }
+
+
+ @Test
+ public void testDummyFactory() throws IOException {
+
+ ChunkerModel model = trainModel(ModelType.MAXENT, new DummyChunkerFactory());
+
+ DummyChunkerFactory factory = (DummyChunkerFactory) model.getFactory();
+ assertTrue(factory instanceof DummyChunkerFactory);
+ assertTrue(factory.getContextGenerator() instanceof DummyChunkerFactory.DummyContextGenerator);
+ assertTrue(factory.getSequenceValidator() instanceof DummyChunkerFactory.DummySequenceValidator);
+
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ model.serialize(out);
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+
+ ChunkerModel fromSerialized = new ChunkerModel(in);
+
+ factory = (DummyChunkerFactory)fromSerialized.getFactory();
+ assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator);
+ assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator);
+
+
+ ChunkerME chunker = new ChunkerME(model);
+
+ String[] toks1 = { "Rockwell", "said", "the", "agreement", "calls", "for",
+ "it", "to", "supply", "200", "additional", "so-called", "shipsets",
+ "for", "the", "planes", "." };
+
+ String[] tags1 = { "NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB",
+ "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "." };
+
+
+ chunker.chunk(toks1, tags1);
+
+ }
+}
Propchange: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java?rev=1392937&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java (added)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java Tue Oct 2 14:45:14 2012
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.chunker;
+
+import opennlp.tools.util.SequenceValidator;
+
+public class DummyChunkerFactory extends ChunkerFactory {
+
+ public DummyChunkerFactory() {
+ }
+
+ @Override
+ public ChunkerContextGenerator getContextGenerator() {
+ return new DummyContextGenerator();
+ }
+
+ @Override
+ public SequenceValidator<String> getSequenceValidator() {
+ return new DummySequenceValidator();
+ }
+
+ static class DummyContextGenerator extends DefaultChunkerContextGenerator {
+
+ @Override
+ public String[] getContext(int i, String[] toks, String[] tags,
+ String[] preds) {
+ return super.getContext(i, toks, tags, preds);
+ }
+ }
+
+ static class DummySequenceValidator extends DefaultChunkerSequenceValidator {
+
+ @Override
+ public boolean validSequence(int i, String[] sequence, String[] s,
+ String outcome) {
+ return super.validSequence(i, sequence, s, outcome);
+ }
+ }
+}
Propchange: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain