You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2012/10/02 16:45:15 UTC

svn commit: r1392937 - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/chunker/ main/java/opennlp/tools/cmdline/chunker/ test/java/opennlp/tools/chunker/

Author: colen
Date: Tue Oct  2 14:45:14 2012
New Revision: 1392937

URL: http://svn.apache.org/viewvc?rev=1392937&view=rev
Log:
OPENNLP-539: Implemented customization factory for the Chunker

Added:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java   (with props)
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java   (with props)
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java   (with props)
Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java Tue Oct  2 14:45:14 2012
@@ -33,10 +33,12 @@ public class ChunkerCrossValidator {
 
   private FMeasure fmeasure = new FMeasure();
   private ChunkerEvaluationMonitor[] listeners;
+  private ChunkerFactory chunkerFactory;
 
   /**
-   * @deprecated use {@link ChunkerCrossValidator#ChunkerCrossValidator(String, TrainingParameters, ChunkerEvaluationMonitor...)}
-   * instead and pass in a TrainingParameters object.
+   * @deprecated Use
+   *             {@link #ChunkerCrossValidator(String, TrainingParameters, ChunkerFactory, ChunkerEvaluationMonitor...)}
+   *             instead.
    */
   @Deprecated
   public ChunkerCrossValidator(String languageCode, int cutoff, int iterations) {
@@ -47,6 +49,9 @@ public class ChunkerCrossValidator {
     listeners = null;
   }
 
+  /**
+   * @deprecated Use {@link #ChunkerCrossValidator(String, TrainingParameters, ChunkerFactory, ChunkerEvaluationMonitor...)} instead. 
+   */
   public ChunkerCrossValidator(String languageCode, TrainingParameters params,
       ChunkerEvaluationMonitor... listeners) {
 
@@ -54,6 +59,14 @@ public class ChunkerCrossValidator {
     this.params = params;
     this.listeners = listeners;
   }
+  
+  public ChunkerCrossValidator(String languageCode, TrainingParameters params,
+      ChunkerFactory factory, ChunkerEvaluationMonitor... listeners) {
+    this.chunkerFactory = factory;
+    this.languageCode = languageCode;
+    this.params = params;
+    this.listeners = listeners;
+  }
 
   /**
    * Starts the evaluation.
@@ -76,12 +89,11 @@ public class ChunkerCrossValidator {
           .next();
 
       ChunkerModel model = ChunkerME.train(languageCode, trainingSampleStream,
-          new DefaultChunkerContextGenerator(), params);
+          params, chunkerFactory);
 
       // do testing
       ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model,
-          ChunkerME.DEFAULT_BEAM_SIZE, new DefaultChunkerSequenceValidator()),
-          listeners);
+          ChunkerME.DEFAULT_BEAM_SIZE), listeners);
 
       evaluator.evaluate(trainingSampleStream.getTestSampleStream());
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java Tue Oct  2 14:45:14 2012
@@ -48,6 +48,8 @@ public class ChunkerEventStream extends 
   /**
    * Creates a new event stream based on the specified data stream.
    * @param d The data stream for this event stream.
+   * 
+   * @deprecated Use {@link #ChunkerEventStream(ObjectStream, ChunkerContextGenerator)} instead.
    */
   public ChunkerEventStream(ObjectStream<ChunkSample> d) {
     this(d, new DefaultChunkerContextGenerator());

Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java?rev=1392937&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java Tue Oct  2 14:45:14 2012
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.chunker;
+
+import opennlp.tools.util.BaseToolFactory;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.SequenceValidator;
+import opennlp.tools.util.ext.ExtensionLoader;
+
+public class ChunkerFactory extends BaseToolFactory {
+
+  /**
+   * Creates a {@link ChunkerFactory} that provides the default implementation
+   * of the resources.
+   */
+  public ChunkerFactory() {
+  }
+
+  public static ChunkerFactory create(String subclassName)
+      throws InvalidFormatException {
+    if (subclassName == null) {
+      // will create the default factory
+      return new ChunkerFactory();
+    }
+    try {
+      ChunkerFactory theFactory = ExtensionLoader.instantiateExtension(
+          ChunkerFactory.class, subclassName);
+      return theFactory;
+    } catch (Exception e) {
+      String msg = "Could not instantiate the " + subclassName
+          + ". The initialization throw an exception.";
+      System.err.println(msg);
+      e.printStackTrace();
+      throw new InvalidFormatException(msg, e);
+    }
+  }
+
+  @Override
+  public void validateArtifactMap() throws InvalidFormatException {
+    // no additional artifacts
+  }
+
+  public SequenceValidator<String> getSequenceValidator() {
+    return new DefaultChunkerSequenceValidator();
+  }
+
+  public ChunkerContextGenerator getContextGenerator() {
+    return new DefaultChunkerContextGenerator();
+  }
+}

Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java Tue Oct  2 14:45:14 2012
@@ -64,6 +64,8 @@ public class ChunkerME implements Chunke
    * @param sequenceValidator  The {@link SequenceValidator} to determines whether the outcome 
    *        is valid for the preceding sequence. This can be used to implement constraints 
    *        on what sequences are valid.
+   * @deprecated Use {@link #ChunkerME(ChunkerModel, int)} instead 
+   *    and use the {@link ChunkerFactory} to configure the {@link SequenceValidator} and {@link ChunkerContextGenerator}.
    */
   public ChunkerME(ChunkerModel model, int beamSize, SequenceValidator<String> sequenceValidator,
       ChunkerContextGenerator contextGenerator) {
@@ -80,6 +82,8 @@ public class ChunkerME implements Chunke
    * @param sequenceValidator  The {@link SequenceValidator} to determines whether the outcome 
    *        is valid for the preceding sequence. This can be used to implement constraints 
    *        on what sequences are valid.
+   * @deprecated Use {@link #ChunkerME(ChunkerModel, int)} instead 
+   *    and use the {@link ChunkerFactory} to configure the {@link SequenceValidator}.
    */
   public ChunkerME(ChunkerModel model, int beamSize,
       SequenceValidator<String> sequenceValidator) {
@@ -95,7 +99,10 @@ public class ChunkerME implements Chunke
    * @param beamSize The size of the beam that should be used when decoding sequences.
    */
   public ChunkerME(ChunkerModel model, int beamSize) {
-    this(model, beamSize, null);
+    this.model = model.getChunkerModel();
+    ChunkerContextGenerator contextGenerator = model.getFactory().getContextGenerator();
+    SequenceValidator<String> sequenceValidator = model.getFactory().getSequenceValidator();
+    beam = new BeamSearch<String>(beamSize, contextGenerator, this.model, sequenceValidator, 0);
   }
   
   /**
@@ -196,7 +203,25 @@ public class ChunkerME implements Chunke
   public double[] probs() {
     return bestSequence.getProbs();
   }
+  
+  public static ChunkerModel train(String lang, ObjectStream<ChunkSample> in,
+      TrainingParameters mlParams, ChunkerFactory factory) throws IOException {
+
+    Map<String, String> manifestInfoEntries = new HashMap<String, String>();
+
+    EventStream es = new ChunkerEventStream(in, factory.getContextGenerator());
 
+    AbstractModel maxentModel = TrainUtil.train(es, mlParams.getSettings(),
+        manifestInfoEntries);
+
+    return new ChunkerModel(lang, maxentModel, manifestInfoEntries, factory);
+  }
+
+  /**
+   * @deprecated Use
+   *             {@link #train(String, ObjectStream, ChunkerContextGenerator, TrainingParameters, ChunkerFactory)}
+   *             instead.
+   */
   public static ChunkerModel train(String lang, ObjectStream<ChunkSample> in, 
       ChunkerContextGenerator contextGenerator, TrainingParameters mlParams)
   throws IOException {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java Tue Oct  2 14:45:14 2012
@@ -30,6 +30,7 @@ import java.util.Map;
 import opennlp.model.AbstractModel;
 import opennlp.model.BinaryFileDataReader;
 import opennlp.model.GenericModelReader;
+import opennlp.tools.util.BaseToolFactory;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.model.BaseModel;
 
@@ -44,17 +45,33 @@ public class ChunkerModel extends BaseMo
   private static final String COMPONENT_NAME = "ChunkerME";
   private static final String CHUNKER_MODEL_ENTRY_NAME = "chunker.model";
 
+  /**
+   * @deprecated Use
+   *             {@link #ChunkerModel(String, AbstractModel, Map, ChunkerFactory)}
+   *             instead.
+   */
   public ChunkerModel(String languageCode, AbstractModel chunkerModel, Map<String, String> manifestInfoEntries) {
-
-    super(COMPONENT_NAME, languageCode, manifestInfoEntries);
-
+    this(languageCode, chunkerModel, manifestInfoEntries, new ChunkerFactory());
+  }
+  
+  public ChunkerModel(String languageCode, AbstractModel chunkerModel,
+      Map<String, String> manifestInfoEntries, ChunkerFactory factory) {
+    super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
     artifactMap.put(CHUNKER_MODEL_ENTRY_NAME, chunkerModel);
-
     checkArtifactMap();
   }
   
+  /**
+   * @deprecated Use
+   *             {@link #ChunkerModel(String, AbstractModel, ChunkerFactory)
+   *             instead.}
+   */
   public ChunkerModel(String languageCode, AbstractModel chunkerModel) {
-    this(languageCode, chunkerModel, null);
+    this(languageCode, chunkerModel, null, new ChunkerFactory());
+  }
+
+  public ChunkerModel(String languageCode, AbstractModel chunkerModel, ChunkerFactory factory) {
+    this(languageCode, chunkerModel, null, factory);
   }
   
   public ChunkerModel(InputStream in) throws IOException, InvalidFormatException {
@@ -82,6 +99,16 @@ public class ChunkerModel extends BaseMo
     return (AbstractModel) artifactMap.get(CHUNKER_MODEL_ENTRY_NAME);
   }
   
+  @Override
+  protected Class<? extends BaseToolFactory> getDefaultFactory() {
+    return ChunkerFactory.class;
+  }
+
+  
+  public ChunkerFactory getFactory() {
+    return (ChunkerFactory) this.toolFactory;
+  }
+  
   public static void main(String[] args) throws FileNotFoundException, IOException {
     
     if (args.length != 4){

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java Tue Oct  2 14:45:14 2012
@@ -24,6 +24,7 @@ import java.util.List;
 import opennlp.tools.chunker.ChunkSample;
 import opennlp.tools.chunker.ChunkerCrossValidator;
 import opennlp.tools.chunker.ChunkerEvaluationMonitor;
+import opennlp.tools.chunker.ChunkerFactory;
 import opennlp.tools.cmdline.AbstractCrossValidatorTool;
 import opennlp.tools.cmdline.CmdLineUtil;
 import opennlp.tools.cmdline.TerminateToolException;
@@ -67,11 +68,15 @@ public final class ChunkerCrossValidator
       listeners.add(detailedFMeasureListener);
     }
 
-    ChunkerCrossValidator validator = new ChunkerCrossValidator(
-        factory.getLang(), mlParams,
-        listeners.toArray(new ChunkerEvaluationMonitor[listeners.size()]));
-      
+    ChunkerCrossValidator validator;
+
     try {
+      ChunkerFactory chunkerFactory = ChunkerFactory
+          .create(params.getFactory());
+
+      validator = new ChunkerCrossValidator(factory.getLang(), mlParams,
+          chunkerFactory,
+          listeners.toArray(new ChunkerEvaluationMonitor[listeners.size()]));
       validator.evaluate(sampleStream, params.getFolds());
     }
     catch (IOException e) {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerEvaluatorTool.java Tue Oct  2 14:45:14 2012
@@ -26,7 +26,6 @@ import opennlp.tools.chunker.ChunkerEval
 import opennlp.tools.chunker.ChunkerEvaluator;
 import opennlp.tools.chunker.ChunkerME;
 import opennlp.tools.chunker.ChunkerModel;
-import opennlp.tools.chunker.DefaultChunkerSequenceValidator;
 import opennlp.tools.cmdline.AbstractEvaluatorTool;
 import opennlp.tools.cmdline.PerformanceMonitor;
 import opennlp.tools.cmdline.TerminateToolException;
@@ -66,7 +65,7 @@ public final class ChunkerEvaluatorTool
     }
 
     ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model,
-        ChunkerME.DEFAULT_BEAM_SIZE, new DefaultChunkerSequenceValidator()),
+        ChunkerME.DEFAULT_BEAM_SIZE),
         listeners.toArray(new ChunkerEvaluationMonitor[listeners.size()]));
     
     final PerformanceMonitor monitor = new PerformanceMonitor("sent");

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java Tue Oct  2 14:45:14 2012
@@ -24,7 +24,6 @@ import java.io.InputStreamReader;
 import opennlp.tools.chunker.ChunkSample;
 import opennlp.tools.chunker.ChunkerME;
 import opennlp.tools.chunker.ChunkerModel;
-import opennlp.tools.chunker.DefaultChunkerSequenceValidator;
 import opennlp.tools.cmdline.AbstractBasicCmdLineTool;
 import opennlp.tools.cmdline.CLI;
 import opennlp.tools.cmdline.CmdLineUtil;
@@ -50,8 +49,7 @@ public class ChunkerMETool extends Abstr
     } else {
       ChunkerModel model = new ChunkerModelLoader().load(new File(args[0]));
 
-      ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE,
-          new DefaultChunkerSequenceValidator());
+      ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE);
 
       ObjectStream<String> lineStream =
         new PlainTextByLineStream(new InputStreamReader(System.in));

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java Tue Oct  2 14:45:14 2012
@@ -21,9 +21,9 @@ import java.io.File;
 import java.io.IOException;
 
 import opennlp.tools.chunker.ChunkSample;
+import opennlp.tools.chunker.ChunkerFactory;
 import opennlp.tools.chunker.ChunkerME;
 import opennlp.tools.chunker.ChunkerModel;
-import opennlp.tools.chunker.DefaultChunkerContextGenerator;
 import opennlp.tools.cmdline.AbstractTrainerTool;
 import opennlp.tools.cmdline.CmdLineUtil;
 import opennlp.tools.cmdline.TerminateToolException;
@@ -63,8 +63,10 @@ public class ChunkerTrainerTool
 
     ChunkerModel model;
     try {
-      model = ChunkerME.train(factory.getLang(), sampleStream,
-          new DefaultChunkerContextGenerator(), mlParams);
+      ChunkerFactory chunkerFactory = ChunkerFactory
+          .create(params.getFactory());
+      model = ChunkerME.train(factory.getLang(), sampleStream, mlParams,
+          chunkerFactory);
     } catch (IOException e) {
       throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " +
           e.getMessage(), e);

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java?rev=1392937&r1=1392936&r2=1392937&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/TrainingParams.java Tue Oct  2 14:45:14 2012
@@ -17,6 +17,8 @@
 
 package opennlp.tools.cmdline.chunker;
 
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
 import opennlp.tools.cmdline.params.BasicTrainingParams;
 
 /**
@@ -26,4 +28,8 @@ import opennlp.tools.cmdline.params.Basi
  */
 interface TrainingParams extends BasicTrainingParams {
   
+  @ParameterDescription(valueName = "factoryName", description = "A sub-class of ChunkerFactory where to get implementation and resources.")
+  @OptionalParameter
+  String getFactory();
+  
 }

Added: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java?rev=1392937&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java (added)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java Tue Oct  2 14:45:14 2012
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.chunker;
+
+import static org.junit.Assert.*;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelType;
+
+import org.junit.Test;
+
+/**
+ * Tests for the {@link ChunkerFactory} class.
+ */
+public class ChunkerFactoryTest {
+
+  private static ObjectStream<ChunkSample> createSampleStream()
+      throws IOException {
+    InputStream in = ChunkerFactoryTest.class.getClassLoader()
+        .getResourceAsStream("opennlp/tools/chunker/test.txt");
+    Reader sentences = new InputStreamReader(in);
+
+    ChunkSampleStream stream = new ChunkSampleStream(new PlainTextByLineStream(
+        sentences));
+    return stream;
+  }
+  
+  static ChunkerModel trainModel(ModelType type, ChunkerFactory factory)
+      throws IOException {
+    return ChunkerME.train("en", createSampleStream(),
+        TrainingParameters.defaultParams(), factory);
+  }
+  
+  @Test
+  public void testDefaultFactory() throws IOException {
+
+    ChunkerModel model = trainModel(ModelType.MAXENT, new ChunkerFactory());
+
+    ChunkerFactory factory = model.getFactory();
+    assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator);
+    assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator);
+
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    model.serialize(out);
+    ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+
+    ChunkerModel fromSerialized = new ChunkerModel(in);
+
+    factory = fromSerialized.getFactory();
+    assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator);
+    assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator);
+  }
+
+  
+  @Test
+  public void testDummyFactory() throws IOException {
+
+    ChunkerModel model = trainModel(ModelType.MAXENT, new DummyChunkerFactory());
+
+    DummyChunkerFactory factory = (DummyChunkerFactory) model.getFactory();
+    assertTrue(factory instanceof DummyChunkerFactory);
+    assertTrue(factory.getContextGenerator() instanceof DummyChunkerFactory.DummyContextGenerator);
+    assertTrue(factory.getSequenceValidator() instanceof DummyChunkerFactory.DummySequenceValidator);
+    
+    
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    model.serialize(out);
+    ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+
+    ChunkerModel fromSerialized = new ChunkerModel(in);
+
+    factory = (DummyChunkerFactory)fromSerialized.getFactory();
+    assertTrue(factory.getContextGenerator() instanceof DefaultChunkerContextGenerator);
+    assertTrue(factory.getSequenceValidator() instanceof DefaultChunkerSequenceValidator);
+    
+    
+    ChunkerME chunker = new ChunkerME(model);
+    
+    String[] toks1 = { "Rockwell", "said", "the", "agreement", "calls", "for",
+        "it", "to", "supply", "200", "additional", "so-called", "shipsets",
+        "for", "the", "planes", "." };
+
+    String[] tags1 = { "NNP", "VBD", "DT", "NN", "VBZ", "IN", "PRP", "TO", "VB",
+        "CD", "JJ", "JJ", "NNS", "IN", "DT", "NNS", "." };
+    
+    
+    chunker.chunk(toks1, tags1);
+    
+  }
+}

Propchange: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java?rev=1392937&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java (added)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java Tue Oct  2 14:45:14 2012
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.chunker;
+
+import opennlp.tools.util.SequenceValidator;
+
+public class DummyChunkerFactory extends ChunkerFactory {
+  
+  public DummyChunkerFactory() {
+  }
+
+  @Override
+  public ChunkerContextGenerator getContextGenerator() {
+    return new DummyContextGenerator();
+  }
+
+  @Override
+  public SequenceValidator<String> getSequenceValidator() {
+    return new DummySequenceValidator();
+  }
+  
+  static class DummyContextGenerator extends DefaultChunkerContextGenerator {
+
+    @Override
+    public String[] getContext(int i, String[] toks, String[] tags,
+        String[] preds) {
+      return super.getContext(i, toks, tags, preds);
+    }
+  }
+
+  static class DummySequenceValidator extends DefaultChunkerSequenceValidator {
+
+    @Override
+    public boolean validSequence(int i, String[] sequence, String[] s,
+        String outcome) {
+      return super.validSequence(i, sequence, s, outcome);
+    }
+  }
+}

Propchange: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/DummyChunkerFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain