You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/16 06:26:39 UTC

[23/66] [partial] incubator-joshua git commit: JOSHUA-252 Make it possible to use Maven to build Joshua

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/zmert/ZMERT.java
----------------------------------------------------------------------
diff --git a/src/joshua/zmert/ZMERT.java b/src/joshua/zmert/ZMERT.java
deleted file mode 100644
index 45f79db..0000000
--- a/src/joshua/zmert/ZMERT.java
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.zmert;
-
-import java.io.BufferedReader;
-import java.io.InputStreamReader;
-
-import joshua.decoder.JoshuaConfiguration;
-import joshua.util.FileUtility;
-
-public class ZMERT {
-  public static void main(String[] args) throws Exception {
-    boolean external = false; // should each MERT iteration be launched externally?
-
-    if (args.length == 1) {
-      if (args[0].equals("-h")) {
-        printZMERTUsage(args.length, true);
-        System.exit(2);
-      } else {
-        external = false;
-      }
-    } else if (args.length == 3) {
-      external = true;
-    } else {
-      printZMERTUsage(args.length, false);
-      System.exit(1);
-    }
-
-    if (!external) {
-      JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
-      MertCore myMert = new MertCore(args[0],joshuaConfiguration);
-      myMert.run_MERT(); // optimize lambda[]!!!
-      myMert.finish();
-    } else {
-      int maxMem = Integer.parseInt(args[1]);
-      String configFileName = args[2];
-      String stateFileName = FileUtility.dirname(configFileName) + "/ZMERT.temp.state";
-      String cp = System.getProperty("java.class.path");
-      boolean done = false;
-      int iteration = 0;
-      while (!done) {
-        ++iteration;
-        Runtime rt = Runtime.getRuntime();
-        Process p =
-            rt.exec("java -Xmx" + maxMem + "m -cp " + cp + " joshua.zmert.MertCore "
-                + configFileName + " " + stateFileName + " " + iteration);
-        BufferedReader br_i = new BufferedReader(new InputStreamReader(p.getInputStream()));
-        BufferedReader br_e = new BufferedReader(new InputStreamReader(p.getErrorStream()));
-        String dummy_line = null;
-        while ((dummy_line = br_i.readLine()) != null) {
-          System.out.println(dummy_line);
-        }
-        while ((dummy_line = br_e.readLine()) != null) {
-          System.out.println(dummy_line);
-        }
-        int status = p.waitFor();
-
-        if (status == 90) {
-          done = true;
-        } else if (status == 91) {
-          done = false;
-        } else {
-          System.out.println("Z-MERT exiting prematurely (MertCore returned " + status + ")...");
-          System.exit(status);
-        }
-      }
-    }
-
-    System.exit(0);
-
-  } // main(String[] args)
-
-  public static void printZMERTUsage(int argsLen, boolean detailed) {
-    if (!detailed) {
-      println("Oops, you provided " + argsLen + " args!");
-      println("");
-      println("Usage:");
-      println("           ZMERT -maxMem maxMemoryInMB MERT_configFile");
-      println("");
-      println("Where -maxMem specifies the maximum amount of memory (in MB) Z-MERT is");
-      println("allowed to use when performing its calculations (no memroy is needed while");
-      println("the decoder is running),");
-      println("and the config file contains any subset of Z-MERT's 20-some parameters,");
-      println("one per line.  Run   ZMERT -h   for more details on those parameters.");
-    } else {
-      println("Usage:");
-      println("           ZMERT -maxMem maxMemoryInMB MERT_configFile");
-      println("");
-      println("Where -maxMem specifies the maximum amount of memory (in MB) Z-MERT is");
-      println("allowed to use when performing its calculations (no memroy is needed while");
-      println("the decoder is running),");
-      println("and the config file contains any subset of Z-MERT's 20-some parameters,");
-      println("one per line.  Those parameters, and their default values, are:");
-      println("");
-      println("Relevant files:");
-      println("  -dir dirPrefix: working directory\n    [[default: null string (i.e. they are in the current directory)]]");
-      println("  -s sourceFile: source sentences (foreign sentences) of the MERT dataset\n    [[default: null string (i.e. file name is not needed by MERT)]]");
-      println("  -r refFile: target sentences (reference translations) of the MERT dataset\n    [[default: reference.txt]]");
-      println("  -rps refsPerSen: number of reference translations per sentence\n    [[default: 1]]");
-      println("  -txtNrm textNormMethod: how should text be normalized?\n       (0) don't normalize text,\n    or (1) \"NIST-style\", and also rejoin 're, *'s, n't, etc,\n    or (2) apply 1 and also rejoin dashes between letters,\n    or (3) apply 1 and also drop non-ASCII characters,\n    or (4) apply 1+2+3\n    [[default: 1]]");
-      println("  -p paramsFile: file containing parameter names, initial values, and ranges\n    [[default: params.txt]]");
-      println("  -docInfo documentInfoFile: file informing Z-MERT which document each\n    sentence belongs to\n    [[default: null string (i.e. all sentences are in one 'document')]]");
-      println("  -fin finalLambda: file name for final lambda[] values\n    [[default: null string (i.e. no such file will be created)]]");
-      println("");
-      println("MERT specs:");
-      println("  -m metricName metric options: name of evaluation metric and its options\n    [[default: BLEU 4 closest]]");
-      println("  -maxIt maxMERTIts: maximum number of MERT iterations\n    [[default: 20]]");
-      println("  -prevIt prevMERTIts: maximum number of previous MERT iterations to\n    construct candidate sets from\n    [[default: 20]]");
-      println("  -minIt minMERTIts: number of iterations before considering an early exit\n    [[default: 5]]");
-      println("  -stopIt stopMinIts: some early stopping criterion must be satisfied in\n    stopMinIts *consecutive* iterations before an early exit\n    [[default: 3]]");
-      println("  -stopSig sigValue: early MERT exit if no weight changes by more than sigValue\n    [[default: -1 (i.e. this criterion is never investigated)]]");
-      println("  -thrCnt threadCount: number of threads to run in parallel when optimizing\n    [[default: 1]]");
-      println("  -save saveInter: save intermediate cfg files (1) or decoder outputs (2)\n    or both (3) or neither (0)\n    [[default: 3]]");
-      println("  -compress compressFiles: should Z-MERT compress the files it produces (1)\n    or not (0)\n    [[default: 0]]");
-      println("  -ipi initsPerIt: number of intermediate initial points per iteration\n    [[default: 20]]");
-      println("  -opi oncePerIt: modify a parameter only once per iteration (1) or not (0)\n    [[default: 0]]");
-      println("  -rand randInit: choose initial point randomly (1) or from paramsFile (0)\n    [[default: 0]]");
-      println("  -seed seed: seed used to initialize random number generator\n    [[default: time (i.e. value returned by System.currentTimeMillis()]]");
-      // println("  -ud useDisk: reliance on disk (0-2; higher value => more reliance)\n    [[default: 2]]");
-      println("");
-      println("Decoder specs:");
-      println("  -cmd commandFile: name of file containing commands to run the decoder\n    [[default: null string (i.e. decoder is a JoshuaDecoder object)]]");
-      println("  -passIt passIterationToDecoder: should iteration number be passed\n    to command file (1) or not (0)\n    [[default: 0]]");
-      println("  -decOut decoderOutFile: name of the output file produced by the decoder\n    [[default: output.nbest]]");
-      println("  -decExit validExit: value returned by decoder to indicate success\n    [[default: 0]]");
-      println("  -dcfg decConfigFile: name of decoder config file\n    [[default: dec_cfg.txt]]");
-      println("  -N N: size of N-best list (per sentence) generated in each MERT iteration\n    [[default: 100]]");
-      println("");
-      println("Output specs:");
-      println("  -v verbosity: Z-MERT verbosity level (0-2; higher value => more verbose)\n    [[default: 1]]");
-      println("  -decV decVerbosity: should decoder output be printed (1) or ignored (0)\n    [[default: 0]]");
-      println("");
-    }
-  }
-
-  private static void println(Object obj) {
-    System.out.println(obj);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/zmert/package.html
----------------------------------------------------------------------
diff --git a/src/joshua/zmert/package.html b/src/joshua/zmert/package.html
deleted file mode 100644
index e3a0b2d..0000000
--- a/src/joshua/zmert/package.html
+++ /dev/null
@@ -1,24 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE.  BEGIN WITH A     #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE:    #####
--->
-Provides code for performing minimum error rate training.
-
-
-<h2>Related Documentation</h2>
-
-<ul>
-  <li> Much of the code in this package is based on Och (2003).
-  <li> A deeper description of the algorithm is in Zaidan (2009).
-</ul>
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/adagrad/AdaGrad.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/adagrad/AdaGrad.java b/src/main/java/org/apache/joshua/adagrad/AdaGrad.java
new file mode 100755
index 0000000..61e90ad
--- /dev/null
+++ b/src/main/java/org/apache/joshua/adagrad/AdaGrad.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.adagrad;
+
+import joshua.decoder.JoshuaConfiguration;
+import joshua.util.FileUtility;
+import joshua.util.StreamGobbler;
+
+public class AdaGrad {
+  public static void main(String[] args) throws Exception {
+    JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+    boolean external = false; // should each AdaGrad iteration be launched externally?
+
+    if (args.length == 1) {
+      if (args[0].equals("-h")) {
+        printAdaGradUsage(args.length, true);
+        System.exit(2);
+      } else {
+        external = false;
+      }
+    } else if (args.length == 3) {
+      external = true;
+    } else {
+      printAdaGradUsage(args.length, false);
+      System.exit(1);
+    }
+
+    if (!external) {
+      AdaGradCore myAdaGrad = new AdaGradCore(args[0], joshuaConfiguration);
+      myAdaGrad.run_AdaGrad(); // optimize lambda[]
+      myAdaGrad.finish();
+    } else {
+
+      int maxMem = Integer.parseInt(args[1]);
+      String configFileName = args[2];
+      String stateFileName = FileUtility.dirname(configFileName) + "/AdaGrad.temp.state";
+      String cp = System.getProperty("java.class.path");
+      boolean done = false;
+      int iteration = 0;
+
+      while (!done) {
+        ++iteration;
+        Runtime rt = Runtime.getRuntime();
+        Process p =
+            rt.exec("java -Xmx" + maxMem + "m -cp " + cp + " joshua.adagrad.AdaGradCore " + configFileName
+                + " " + stateFileName + " " + iteration);
+        /*
+         * BufferedReader br_i = new BufferedReader(new InputStreamReader(p.getInputStream()));
+         * BufferedReader br_e = new BufferedReader(new InputStreamReader(p.getErrorStream()));
+         * String dummy_line = null; while ((dummy_line = br_i.readLine()) != null) {
+         * System.out.println(dummy_line); } while ((dummy_line = br_e.readLine()) != null) {
+         * System.out.println(dummy_line); }
+         */
+        StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(), 1);
+        StreamGobbler outputGobbler = new StreamGobbler(p.getInputStream(), 1);
+
+        errorGobbler.start();
+        outputGobbler.start();
+
+        int status = p.waitFor();
+
+        if (status == 90) {
+          done = true;
+        } else if (status == 91) {
+          done = false;
+        } else {
+          System.out.println("AdaGrad exiting prematurely (AdaGradCore returned " + status + ")...");
+          break;
+        }
+      }
+    }
+
+    System.exit(0);
+
+  } // main(String[] args)
+
+  public static void printAdaGradUsage(int argsLen, boolean detailed) {
+    if (!detailed) {
+      println("Oops, you provided " + argsLen + " args!");
+      println("");
+      println("Usage:");
+      println("           AdaGrad -maxMem maxMemoryInMB AdaGrad_configFile");
+      println("");
+      println("Where -maxMem specifies the maximum amount of memory (in MB) AdaGrad is");
+      println("allowed to use when performing its calculations (no memroy is needed while");
+      println("the decoder is running),");
+      println("and the config file contains any subset of AdaGrad's 20-some parameters,");
+      println("one per line.  Run   AdaGrad -h   for more details on those parameters.");
+    } else {
+      println("Usage:");
+      println("           AdaGrad -maxMem maxMemoryInMB AdaGrad_configFile");
+      println("");
+      println("Where -maxMem specifies the maximum amount of memory (in MB) AdaGrad is");
+      println("allowed to use when performing its calculations (no memroy is needed while");
+      println("the decoder is running),");
+      println("and the config file contains any subset of AdaGrad's 20-some parameters,");
+      println("one per line.  Those parameters, and their default values, are:");
+      println("");
+      println("Relevant files:");
+      println("  -dir dirPrefix: working directory\n    [[default: null string (i.e. they are in the current directory)]]");
+      println("  -s sourceFile: source sentences (foreign sentences) of the AdaGrad dataset\n    [[default: null string (i.e. file name is not needed by AdaGrad)]]");
+      println("  -r refFile: target sentences (reference translations) of the AdaGrad dataset\n    [[default: reference.txt]]");
+      println("  -rps refsPerSen: number of reference translations per sentence\n    [[default: 1]]");
+      //println("  -txtNrm textNormMethod: how should text be normalized?\n       (0) don't normalize text,\n    or (1) \"NIST-style\", and also rejoin 're, *'s, n't, etc,\n    or (2) apply 1 and also rejoin dashes between letters,\n    or (3) apply 1 and also drop non-ASCII characters,\n    or (4) apply 1+2+3\n    [[default: 1]]");
+      println("  -p paramsFile: file containing parameter names, initial values, and ranges\n    [[default: params.txt]]");
+      //println("  -docInfo documentInfoFile: file informing AdaGrad which document each\n    sentence belongs to\n    [[default: null string (i.e. all sentences are in one 'document')]]");
+      println("  -fin finalLambda: file name for final lambda[] values\n    [[default: null string (i.e. no such file will be created)]]");
+      println("");
+      println("AdaGrad specs:");
+      println("  -m metricName metric options: name of evaluation metric and its options\n    [[default: BLEU 4 closest]]");
+      println("  -maxIt maxAdaGradIts: maximum number of AdaGrad iterations\n    [[default: 20]]");
+      println("  -prevIt prevAdaGradIts: maximum number of previous AdaGrad iterations to\n    construct candidate sets from\n    [[default: 20]]");
+      println("  -minIt minAdaGradIts: number of iterations before considering an early exit\n    [[default: 5]]");
+      println("  -stopIt stopMinIts: some early stopping criterion must be satisfied in\n    stopMinIts *consecutive* iterations before an early exit\n    [[default: 3]]");
+      println("  -stopSig sigValue: early AdaGrad exit if no weight changes by more than sigValue\n    [[default: -1 (i.e. this criterion is never investigated)]]");
+      //println("  -thrCnt threadCount: number of threads to run in parallel when optimizing\n    [[default: 1]]");
+      println("  -save saveInter: save intermediate cfg files (1) or decoder outputs (2)\n    or both (3) or neither (0)\n    [[default: 3]]");
+      println("  -compress compressFiles: should AdaGrad compress the files it produces (1)\n    or not (0)\n    [[default: 0]]");
+      //println("  -ipi initsPerIt: number of intermediate initial points per iteration\n    [[default: 20]]");
+      //println("  -opi oncePerIt: modify a parameter only once per iteration (1) or not (0)\n    [[default: 0]]");
+      //println("  -rand randInit: choose initial point randomly (1) or from paramsFile (0)\n    [[default: 0]]");
+      //println("  -seed seed: seed used to initialize random number generator\n    [[default: time (i.e. value returned by System.currentTimeMillis()]]");
+      // println("  -ud useDisk: reliance on disk (0-2; higher value => more reliance)\n    [[default: 2]]");
+      println("");
+      println("Decoder specs:");
+      println("  -cmd commandFile: name of file containing commands to run the decoder\n    [[default: null string (i.e. decoder is a JoshuaDecoder object)]]");
+      println("  -passIt passIterationToDecoder: should iteration number be passed\n    to command file (1) or not (0)\n    [[default: 0]]");
+      println("  -decOut decoderOutFile: name of the output file produced by the decoder\n    [[default: output.nbest]]");
+      println("  -decExit validExit: value returned by decoder to indicate success\n    [[default: 0]]");
+      println("  -dcfg decConfigFile: name of decoder config file\n    [[default: dec_cfg.txt]]");
+      println("  -N N: size of N-best list (per sentence) generated in each AdaGrad iteration\n    [[default: 100]]");
+      println("");
+      println("Output specs:");
+      println("  -v verbosity: AdaGrad verbosity level (0-2; higher value => more verbose)\n    [[default: 1]]");
+      println("  -decV decVerbosity: should decoder output be printed (1) or ignored (0)\n    [[default: 0]]");
+      println("");
+    }
+  }
+
+  private static void println(Object obj) {
+    System.out.println(obj);
+  }
+
+}