You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2011/07/19 17:38:52 UTC

svn commit: r1148403 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline: CLI.java dictionary/ dictionary/AbbreviationDictionaryBuilderTool.java dictionary/DictionaryBuilderParams.java

Author: colen
Date: Tue Jul 19 15:38:51 2011
New Revision: 1148403

URL: http://svn.apache.org/viewvc?rev=1148403&view=rev
Log:
OPENNLP-234 Added abbreviation dictionary builder tool

Added:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/AbbreviationDictionaryBuilderTool.java   (with props)
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderParams.java   (with props)
Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java?rev=1148403&r1=1148402&r2=1148403&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java Tue Jul 19 15:38:51 2011
@@ -30,6 +30,7 @@ import opennlp.tools.cmdline.chunker.Chu
 import opennlp.tools.cmdline.chunker.ChunkerEvaluatorTool;
 import opennlp.tools.cmdline.chunker.ChunkerMETool;
 import opennlp.tools.cmdline.chunker.ChunkerTrainerTool;
+import opennlp.tools.cmdline.dictionary.AbbreviationDictionaryBuilderTool;
 import opennlp.tools.cmdline.doccat.DoccatConverterTool;
 import opennlp.tools.cmdline.doccat.DoccatTool;
 import opennlp.tools.cmdline.doccat.DoccatTrainerTool;
@@ -77,6 +78,9 @@ public final class CLI {
     tools.add(new DoccatTrainerTool());
     tools.add(new DoccatConverterTool());
     
+    // Abbreviation Dictionary
+    tools.add(new AbbreviationDictionaryBuilderTool());
+    
     // Tokenizer
     tools.add(new SimpleTokenizerTool());
     tools.add(new TokenizerMETool());

Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/AbbreviationDictionaryBuilderTool.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/AbbreviationDictionaryBuilderTool.java?rev=1148403&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/AbbreviationDictionaryBuilderTool.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/AbbreviationDictionaryBuilderTool.java Tue Jul 19 15:38:51 2011
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.dictionary;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.nio.charset.Charset;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CLI;
+import opennlp.tools.cmdline.CmdLineTool;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.dictionary.AbbreviationDictionary;
+
+public class AbbreviationDictionaryBuilderTool implements CmdLineTool {
+
+  interface Params extends DictionaryBuilderParams {
+
+  }
+
+  public String getName() {
+    return "AbbDictBuilder";
+  }
+
+  public String getShortDescription() {
+    return "builds a new abbreviation dictionary";
+  }
+
+  public String getHelp() {
+    return "Usage: " + CLI.CMD + " " + getName() + " "
+        + ArgumentParser.createUsage(Params.class);
+
+  }
+
+  public void run(String[] args) {
+    if (!ArgumentParser.validateArguments(args, Params.class)) {
+      System.err.println(getHelp());
+      throw new TerminateToolException(1);
+    }
+
+    Params params = ArgumentParser.parse(args, Params.class);
+
+    File dictInFile = params.getInputFile();
+    File dictOutFile = params.getOutputFile();
+    Charset encoding = params.getEncoding();
+
+    CmdLineUtil
+        .checkInputFile("abbreviation dictionary input file", dictInFile);
+    CmdLineUtil.checkOutputFile("abbreviation dictionary output file",
+        dictOutFile);
+
+    InputStreamReader in = null;
+    OutputStream out = null;
+    try {
+      in = new InputStreamReader(new FileInputStream(dictInFile), encoding);
+      out = new FileOutputStream(dictOutFile);
+
+      AbbreviationDictionary dict = AbbreviationDictionary
+          .parseOneEntryPerLine(in);
+      dict.serialize(out);
+
+    } catch (IOException e) {
+      CmdLineUtil.printTrainingIoError(e);
+      throw new TerminateToolException(-1);
+    } finally {
+      try {
+        in.close();
+        out.close();
+      } catch (IOException e) {
+        // sorry that this can fail
+      }
+    }
+
+  }
+
+}

Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/AbbreviationDictionaryBuilderTool.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderParams.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderParams.java?rev=1148403&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderParams.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderParams.java Tue Jul 19 15:38:51 2011
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.cmdline.dictionary;
+
+import java.io.File;
+
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+import opennlp.tools.cmdline.EncodingParameter;
+
+
+/**
+ * Params for Dictionary tools.
+ * 
+ * Note: Do not use this class, internal use only!
+ */
+interface DictionaryBuilderParams extends EncodingParameter {
+  
+  @ParameterDescription(valueName = "in", description = "Plain file with one entry per line")
+  File getInputFile();
+  
+  @ParameterDescription(valueName = "out", description = "The dictionary file.")
+  File getOutputFile();
+  
+}

Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/dictionary/DictionaryBuilderParams.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain