You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/07/12 20:24:17 UTC

svn commit: r1145704 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind: TokenNameFinderCrossValidatorTool.java TokenNameFinderTrainerTool.java TrainingParametersI.java

Author: colen
Date: Tue Jul 12 18:24:17 2011
New Revision: 1145704

URL: http://svn.apache.org/viewvc?rev=1145704&view=rev
Log:
OPENNLP-221 Refactored Name Finder cross validator to use Parameters

Added:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParametersI.java   (with props)
Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java?rev=1145704&r1=1145703&r2=1145704&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java Tue Jul 12 18:24:17 2011
@@ -19,8 +19,11 @@ package opennlp.tools.cmdline.namefind;
 
 import java.io.File;
 import java.io.IOException;
+import java.nio.charset.Charset;
 import java.util.Map;
 
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.BasicCrossValidatorParameters;
 import opennlp.tools.cmdline.CLI;
 import opennlp.tools.cmdline.CmdLineTool;
 import opennlp.tools.cmdline.CmdLineUtil;
@@ -30,6 +33,10 @@ import opennlp.tools.namefind.TokenNameF
 import opennlp.tools.util.ObjectStream;
 
 public final class TokenNameFinderCrossValidatorTool implements CmdLineTool {
+  
+  interface Parameters extends TrainingParametersI, BasicCrossValidatorParameters{
+    
+  }
 
   public String getName() {
     return "TokenNameFinderCrossValidator";
@@ -46,44 +53,39 @@ public final class TokenNameFinderCrossV
   }
 
   public void run(String[] args) {
-    if (args.length < 6) {
-      System.out.println(getHelp());
-      throw new TerminateToolException(1);
-    }
-
-    TrainingParameters parameters = new TrainingParameters(args);
-
-    if (!parameters.isValid()) {
-      System.out.println(getHelp());
+    if (!ArgumentParser.validateArguments(args, Parameters.class)) {
+      System.err.println(getHelp());
       throw new TerminateToolException(1);
     }
+    
+    Parameters params = ArgumentParser.parse(args, Parameters.class);
 
     opennlp.tools.util.TrainingParameters mlParams = CmdLineUtil
-        .loadTrainingParameters(CmdLineUtil.getParameter("-params", args),
-            false);
+        .loadTrainingParameters(params.getParams(),false);
 
     byte featureGeneratorBytes[] = TokenNameFinderTrainerTool
-        .openFeatureGeneratorBytes(parameters.getFeatureGenDescriptorFile());
+        .openFeatureGeneratorBytes(params.getFeaturegen());
 
     Map<String, Object> resources = TokenNameFinderTrainerTool
-        .loadResources(parameters.getResourceDirectory());
+        .loadResources(params.getResources());
 
-    File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
+    File trainingDataInFile = params.getData();
     CmdLineUtil.checkInputFile("Training Data", trainingDataInFile);
+    
+    Charset encoding = params.getEncoding();
 
     ObjectStream<NameSample> sampleStream = TokenNameFinderTrainerTool
-        .openSampleData("Training Data", trainingDataInFile,
-            parameters.getEncoding());
+        .openSampleData("Training Data", trainingDataInFile, encoding);
 
     TokenNameFinderCrossValidator validator;
 
     try {
       if (mlParams == null) {
-        validator = new TokenNameFinderCrossValidator(parameters.getLanguage(), parameters.getType(),
-             featureGeneratorBytes, resources, parameters.getNumberOfIterations(),
-            parameters.getCutoff());
+        validator = new TokenNameFinderCrossValidator(params.getLang(), params.getType(),
+             featureGeneratorBytes, resources, params.getIterations(),
+            params.getCutoff());
       } else {
-        validator = new TokenNameFinderCrossValidator(parameters.getLanguage(), parameters.getType(), mlParams,
+        validator = new TokenNameFinderCrossValidator(params.getLang(), params.getType(), mlParams,
             featureGeneratorBytes, resources);
       }
       validator.evaluate(sampleStream, 10);

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1145704&r1=1145703&r2=1145704&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java Tue Jul 12 18:24:17 2011
@@ -22,7 +22,6 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.Charset;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -68,11 +67,14 @@ public final class TokenNameFinderTraine
   }
   
   static byte[] openFeatureGeneratorBytes(String featureGenDescriptorFile) {
+    return openFeatureGeneratorBytes(featureGenDescriptorFile);
+  }
+  
+  static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) {
     byte featureGeneratorBytes[] = null;
     // load descriptor file into memory
     if (featureGenDescriptorFile != null) {
-      InputStream bytesIn = CmdLineUtil.openInFile(new File(
-          featureGenDescriptorFile));
+      InputStream bytesIn = CmdLineUtil.openInFile(featureGenDescriptorFile);
 
       try {
         featureGeneratorBytes = ModelUtil.read(bytesIn);
@@ -90,16 +92,14 @@ public final class TokenNameFinderTraine
     return featureGeneratorBytes;
   }
   
-  static Map<String, Object> loadResources(String resourceDirectory) {
+  static Map<String, Object> loadResources(File resourcePath) {
     Map<String, Object> resources = new HashMap<String, Object>();
 
-    if (resourceDirectory != null) {
+    if (resourcePath != null) {
 
       Map<String, ArtifactSerializer> artifactSerializers = TokenNameFinderModel
           .createArtifactSerializers();
 
-      File resourcePath = new File(resourceDirectory);
-
       File resourceFiles[] = resourcePath.listFiles();
 
       // TODO: Filter files, also files with start with a dot
@@ -144,10 +144,19 @@ public final class TokenNameFinderTraine
         }
       }
     }
-
     return resources;
   }
   
+  static Map<String, Object> loadResources(String resourceDirectory) {
+
+    if (resourceDirectory != null) {
+      File resourcePath = new File(resourceDirectory);
+      return loadResources(resourcePath);
+    }
+
+    return new HashMap<String, Object>();
+  }
+  
   public void run(String[] args) {
     
     if (args.length < 8) {

Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParametersI.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParametersI.java?rev=1145704&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParametersI.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParametersI.java Tue Jul 12 18:24:17 2011
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.namefind;
+
+import java.io.File;
+
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+import opennlp.tools.cmdline.BasicTrainingParametersI;
+
+/**
+ * TrainingParameters for Name Finder.
+ * 
+ * Note: Do not use this class, internal use only!
+ */
+interface TrainingParametersI extends BasicTrainingParametersI {
+  
+  @ParameterDescription(valueName = "modelType", description = "The type of the token name finder model")
+  @OptionalParameter(defaultValue = "default")
+  String getType();
+  
+  @ParameterDescription(valueName = "resourcesDir", description = "The resources directory")
+  @OptionalParameter
+  File getResources();
+  
+  @ParameterDescription(valueName = "featuregenFile", description = "The feature generator descriptor file")
+  @OptionalParameter
+  File getFeaturegen();  
+}

Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParametersI.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain