You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/01/07 19:09:34 UTC

svn commit: r1056431 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: chunker/ChunkerCrossValidator.java cmdline/CLI.java cmdline/chunker/ChunkerCrossValidatorTool.java

Author: colen
Date: Fri Jan  7 18:09:33 2011
New Revision: 1056431

URL: http://svn.apache.org/viewvc?rev=1056431&view=rev
Log:
OPENNLP-30: Added code for chunk cross validation

Added:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java   (with props)
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java   (with props)
Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java

Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java?rev=1056431&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java Fri Jan  7 18:09:33 2011
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.chunker;
+
+import java.io.IOException;
+
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.eval.CrossValidationPartitioner;
+import opennlp.tools.util.eval.FMeasure;
+
+public class ChunkerCrossValidator {
+
+	private final String languageCode;
+	private final int cutoff;
+	private final int iterations;
+	private FMeasure fmeasure = new FMeasure();
+
+	public ChunkerCrossValidator(String languageCode, int cutoff, int iterations) {
+		this.languageCode = languageCode;
+		this.cutoff = cutoff;
+		this.iterations = iterations;
+	}
+
+	public void evaluate(ObjectStream<ChunkSample> samples, int nFolds)
+			throws IOException, InvalidFormatException, IOException {
+		CrossValidationPartitioner<ChunkSample> partitioner = new CrossValidationPartitioner<ChunkSample>(
+				samples, nFolds);
+
+		while (partitioner.hasNext()) {
+
+			CrossValidationPartitioner.TrainingSampleStream<ChunkSample> trainingSampleStream = partitioner
+					.next();
+
+			ChunkerModel model = ChunkerME.train(languageCode, trainingSampleStream,
+					cutoff, iterations);
+
+			// do testing
+			ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model));
+
+			evaluator.evaluate(trainingSampleStream.getTestSampleStream());
+
+			fmeasure.mergeInto(evaluator.getFMeasure());
+		}
+	}
+
+	public FMeasure getFMeasure() {
+		return fmeasure;
+	}
+}

Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java?rev=1056431&r1=1056430&r2=1056431&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java Fri Jan  7 18:09:33 2011
@@ -26,6 +26,7 @@ import java.util.Map;
 import java.util.Set;
 
 import opennlp.tools.cmdline.chunker.ChunkerConverterTool;
+import opennlp.tools.cmdline.chunker.ChunkerCrossValidatorTool;
 import opennlp.tools.cmdline.chunker.ChunkerEvaluatorTool;
 import opennlp.tools.cmdline.chunker.ChunkerMETool;
 import opennlp.tools.cmdline.chunker.ChunkerTrainerTool;
@@ -103,6 +104,7 @@ public final class CLI {
     tools.add(new ChunkerMETool());
     tools.add(new ChunkerTrainerTool());
     tools.add(new ChunkerEvaluatorTool());
+    tools.add(new ChunkerCrossValidatorTool());
     tools.add(new ChunkerConverterTool());
     
     // Parser

Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java?rev=1056431&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java Fri Jan  7 18:09:33 2011
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.chunker;
+
+import java.io.File;
+import java.io.IOException;
+
+import opennlp.tools.chunker.ChunkSample;
+import opennlp.tools.chunker.ChunkerCrossValidator;
+import opennlp.tools.cmdline.BasicTrainingParameters;
+import opennlp.tools.cmdline.CLI;
+import opennlp.tools.cmdline.CmdLineTool;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.parser.TrainingParameters;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.eval.FMeasure;
+
+public final class ChunkerCrossValidatorTool implements CmdLineTool {
+
+  public String getName() {
+    return "ChunkerCrossValidator";
+  }
+  
+  public String getShortDescription() {
+    return "10-fold cross validator for the chunker";
+  }
+  
+  public String getHelp() {
+    return "Usage: " + CLI.CMD + " " + getName() + " " + TrainingParameters.getParameterUsage() + "\n"+
+        BasicTrainingParameters.getDescription() + "\n"+
+        "-data trainingData      training data used for cross validation";
+  }
+
+  public void run(String[] args) {
+    if (args.length < 6) {
+      System.out.println(getHelp());
+      throw new TerminateToolException(1);
+    }
+    
+    BasicTrainingParameters parameters = new BasicTrainingParameters(args);
+    
+    if(!parameters.isValid()) {
+      System.out.println(getHelp());
+      throw new TerminateToolException(1);
+    }
+    
+    File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
+    CmdLineUtil.checkInputFile("Training Data", trainingDataInFile);
+    
+    ObjectStream<ChunkSample> sampleStream =
+        ChunkerTrainerTool.openSampleData("Training Data",
+        trainingDataInFile, parameters.getEncoding());
+    
+    ChunkerCrossValidator validator =
+        new ChunkerCrossValidator(
+        		parameters.getLanguage(), parameters.getCutoff(), parameters.getNumberOfIterations());
+      
+    try {
+      validator.evaluate(sampleStream, 10);
+    }
+    catch (IOException e) {
+      CmdLineUtil.printTrainingIoError(e);
+      throw new TerminateToolException(-1);
+    }
+    finally {
+      try {
+        sampleStream.close();
+      } catch (IOException e) {
+        // sorry that this can fail
+      }
+    }
+    
+    FMeasure result = validator.getFMeasure();
+    
+    System.out.println(result.toString());
+  }
+}

Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain