You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/07/12 20:24:17 UTC
svn commit: r1145704 - in
/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind:
TokenNameFinderCrossValidatorTool.java TokenNameFinderTrainerTool.java
TrainingParametersI.java
Author: colen
Date: Tue Jul 12 18:24:17 2011
New Revision: 1145704
URL: http://svn.apache.org/viewvc?rev=1145704&view=rev
Log:
OPENNLP-221 Refactored Name Finder cross validator to use Parameters
Added:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParametersI.java (with props)
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java?rev=1145704&r1=1145703&r2=1145704&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java Tue Jul 12 18:24:17 2011
@@ -19,8 +19,11 @@ package opennlp.tools.cmdline.namefind;
import java.io.File;
import java.io.IOException;
+import java.nio.charset.Charset;
import java.util.Map;
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.BasicCrossValidatorParameters;
import opennlp.tools.cmdline.CLI;
import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
@@ -30,6 +33,10 @@ import opennlp.tools.namefind.TokenNameF
import opennlp.tools.util.ObjectStream;
public final class TokenNameFinderCrossValidatorTool implements CmdLineTool {
+
+ interface Parameters extends TrainingParametersI, BasicCrossValidatorParameters{
+
+ }
public String getName() {
return "TokenNameFinderCrossValidator";
@@ -46,44 +53,39 @@ public final class TokenNameFinderCrossV
}
public void run(String[] args) {
- if (args.length < 6) {
- System.out.println(getHelp());
- throw new TerminateToolException(1);
- }
-
- TrainingParameters parameters = new TrainingParameters(args);
-
- if (!parameters.isValid()) {
- System.out.println(getHelp());
+ if (!ArgumentParser.validateArguments(args, Parameters.class)) {
+ System.err.println(getHelp());
throw new TerminateToolException(1);
}
+
+ Parameters params = ArgumentParser.parse(args, Parameters.class);
opennlp.tools.util.TrainingParameters mlParams = CmdLineUtil
- .loadTrainingParameters(CmdLineUtil.getParameter("-params", args),
- false);
+ .loadTrainingParameters(params.getParams(),false);
byte featureGeneratorBytes[] = TokenNameFinderTrainerTool
- .openFeatureGeneratorBytes(parameters.getFeatureGenDescriptorFile());
+ .openFeatureGeneratorBytes(params.getFeaturegen());
Map<String, Object> resources = TokenNameFinderTrainerTool
- .loadResources(parameters.getResourceDirectory());
+ .loadResources(params.getResources());
- File trainingDataInFile = new File(CmdLineUtil.getParameter("-data", args));
+ File trainingDataInFile = params.getData();
CmdLineUtil.checkInputFile("Training Data", trainingDataInFile);
+
+ Charset encoding = params.getEncoding();
ObjectStream<NameSample> sampleStream = TokenNameFinderTrainerTool
- .openSampleData("Training Data", trainingDataInFile,
- parameters.getEncoding());
+ .openSampleData("Training Data", trainingDataInFile, encoding);
TokenNameFinderCrossValidator validator;
try {
if (mlParams == null) {
- validator = new TokenNameFinderCrossValidator(parameters.getLanguage(), parameters.getType(),
- featureGeneratorBytes, resources, parameters.getNumberOfIterations(),
- parameters.getCutoff());
+ validator = new TokenNameFinderCrossValidator(params.getLang(), params.getType(),
+ featureGeneratorBytes, resources, params.getIterations(),
+ params.getCutoff());
} else {
- validator = new TokenNameFinderCrossValidator(parameters.getLanguage(), parameters.getType(), mlParams,
+ validator = new TokenNameFinderCrossValidator(params.getLang(), params.getType(), mlParams,
featureGeneratorBytes, resources);
}
validator.evaluate(sampleStream, 10);
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1145704&r1=1145703&r2=1145704&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java Tue Jul 12 18:24:17 2011
@@ -22,7 +22,6 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
-import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -68,11 +67,14 @@ public final class TokenNameFinderTraine
}
static byte[] openFeatureGeneratorBytes(String featureGenDescriptorFile) {
+ return openFeatureGeneratorBytes(featureGenDescriptorFile);
+ }
+
+ static byte[] openFeatureGeneratorBytes(File featureGenDescriptorFile) {
byte featureGeneratorBytes[] = null;
// load descriptor file into memory
if (featureGenDescriptorFile != null) {
- InputStream bytesIn = CmdLineUtil.openInFile(new File(
- featureGenDescriptorFile));
+ InputStream bytesIn = CmdLineUtil.openInFile(featureGenDescriptorFile);
try {
featureGeneratorBytes = ModelUtil.read(bytesIn);
@@ -90,16 +92,14 @@ public final class TokenNameFinderTraine
return featureGeneratorBytes;
}
- static Map<String, Object> loadResources(String resourceDirectory) {
+ static Map<String, Object> loadResources(File resourcePath) {
Map<String, Object> resources = new HashMap<String, Object>();
- if (resourceDirectory != null) {
+ if (resourcePath != null) {
Map<String, ArtifactSerializer> artifactSerializers = TokenNameFinderModel
.createArtifactSerializers();
- File resourcePath = new File(resourceDirectory);
-
File resourceFiles[] = resourcePath.listFiles();
// TODO: Filter files, also files with start with a dot
@@ -144,10 +144,19 @@ public final class TokenNameFinderTraine
}
}
}
-
return resources;
}
+ static Map<String, Object> loadResources(String resourceDirectory) {
+
+ if (resourceDirectory != null) {
+ File resourcePath = new File(resourceDirectory);
+ return loadResources(resourcePath);
+ }
+
+ return new HashMap<String, Object>();
+ }
+
public void run(String[] args) {
if (args.length < 8) {
Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParametersI.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParametersI.java?rev=1145704&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParametersI.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParametersI.java Tue Jul 12 18:24:17 2011
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.namefind;
+
+import java.io.File;
+
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+import opennlp.tools.cmdline.BasicTrainingParametersI;
+
+/**
+ * TrainingParameters for Name Finder.
+ *
+ * Note: Do not use this class, internal use only!
+ */
+interface TrainingParametersI extends BasicTrainingParametersI {
+
+ @ParameterDescription(valueName = "modelType", description = "The type of the token name finder model")
+ @OptionalParameter(defaultValue = "default")
+ String getType();
+
+ @ParameterDescription(valueName = "resourcesDir", description = "The resources directory")
+ @OptionalParameter
+ File getResources();
+
+ @ParameterDescription(valueName = "featuregenFile", description = "The feature generator descriptor file")
+ @OptionalParameter
+ File getFeaturegen();
+}
Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TrainingParametersI.java
------------------------------------------------------------------------------
svn:mime-type = text/plain