You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/16 06:26:48 UTC

[32/66] [partial] incubator-joshua git commit: JOSHUA-252 Make it possible to use Maven to build Joshua

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/pro/PROCore.java
----------------------------------------------------------------------
diff --git a/src/joshua/pro/PROCore.java b/src/joshua/pro/PROCore.java
deleted file mode 100755
index 9e0a09a..0000000
--- a/src/joshua/pro/PROCore.java
+++ /dev/null
@@ -1,3106 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.pro;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.text.DecimalFormat;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Random;
-import java.util.Scanner;
-import java.util.TreeSet;
-import java.util.Vector;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-
-import joshua.decoder.Decoder;
-import joshua.decoder.JoshuaConfiguration;
-import joshua.metrics.EvaluationMetric;
-import joshua.util.StreamGobbler;
-import joshua.corpus.Vocabulary;
-
-/**
- * This code was originally written by Yuan Cao, who copied the MERT code to produce this file.
- */
-
-public class PROCore {
-  private final JoshuaConfiguration joshuaConfiguration;
-  private TreeSet<Integer>[] indicesOfInterest_all;
-
-  private final static DecimalFormat f4 = new DecimalFormat("###0.0000");
-  private final Runtime myRuntime = Runtime.getRuntime();
-
-  private final static double NegInf = (-1.0 / 0.0);
-  private final static double PosInf = (+1.0 / 0.0);
-  private final static double epsilon = 1.0 / 1000000;
-
-  private int progress;
-
-  private int verbosity; // anything of priority <= verbosity will be printed
-                         // (lower value for priority means more important)
-
-  private Random randGen;
-  private int generatedRands;
-
-  private int numSentences;
-  // number of sentences in the dev set
-  // (aka the "MERT training" set)
-
-  private int numDocuments;
-  // number of documents in the dev set
-  // this should be 1, unless doing doc-level optimization
-
-  private int[] docOfSentence;
-  // docOfSentence[i] stores which document contains the i'th sentence.
-  // docOfSentence is 0-indexed, as are the documents (i.e. first doc is indexed 0)
-
-  private int[] docSubsetInfo;
-  // stores information regarding which subset of the documents are evaluated
-  // [0]: method (0-6)
-  // [1]: first (1-indexed)
-  // [2]: last (1-indexed)
-  // [3]: size
-  // [4]: center
-  // [5]: arg1
-  // [6]: arg2
-  // [1-6] are 0 for method 0, [6] is 0 for methods 1-4 as well
-  // only [1] and [2] are needed for optimization. The rest are only needed for an output message.
-
-  private int refsPerSen;
-  // number of reference translations per sentence
-
-  private int textNormMethod;
-  // 0: no normalization, 1: "NIST-style" tokenization, and also rejoin 'm, 're, *'s, 've, 'll, 'd,
-  // and n't,
-  // 2: apply 1 and also rejoin dashes between letters, 3: apply 1 and also drop non-ASCII
-  // characters
-  // 4: apply 1+2+3
-
-  private int numParams;
-  // total number of firing features
-  // this number may increase overtime as new n-best lists are decoded
-  // initially it is equal to the # of params in the parameter config file
-  private int numParamsOld;
-  // number of features before observing the new features fired in the current iteration
-
-  private double[] normalizationOptions;
-  // How should a lambda[] vector be normalized (before decoding)?
-  // nO[0] = 0: no normalization
-  // nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1]
-  // nO[0] = 2: scale so that the maximum absolute value is nO[1]
-  // nO[0] = 3: scale so that the minimum absolute value is nO[1]
-  // nO[0] = 4: scale so that the L-nO[1] norm equals nO[2]
-
-  /* *********************************************************** */
-  /* NOTE: indexing starts at 1 in the following few arrays: */
-  /* *********************************************************** */
-
-  // private double[] lambda;
-  private ArrayList<Double> lambda = new ArrayList<Double>();
-  // the current weight vector. NOTE: indexing starts at 1.
-  private ArrayList<Double> bestLambda = new ArrayList<Double>();
-  // the best weight vector across all iterations
-
-  private boolean[] isOptimizable;
-  // isOptimizable[c] = true iff lambda[c] should be optimized
-
-  private double[] minRandValue;
-  private double[] maxRandValue;
-  // when choosing a random value for the lambda[c] parameter, it will be
-  // chosen from the [minRandValue[c],maxRandValue[c]] range.
-  // (*) minRandValue and maxRandValue must be real values, but not -Inf or +Inf
-
-  private double[] defaultLambda;
-  // "default" parameter values; simply the values read in the parameter file
-  // USED FOR NON-OPTIMIZABLE (FIXED) FEATURES
-
-  /* *********************************************************** */
-  /* *********************************************************** */
-
-  private Decoder myDecoder;
-  // COMMENT OUT if decoder is not Joshua
-
-  private String decoderCommand;
-  // the command that runs the decoder; read from decoderCommandFileName
-
-  private int decVerbosity;
-  // verbosity level for decoder output. If 0, decoder output is ignored.
-  // If 1, decoder output is printed.
-
-  private int validDecoderExitValue;
-  // return value from running the decoder command that indicates success
-
-  private int numOptThreads;
-  // number of threads to run things in parallel
-
-  private int saveInterFiles;
-  // 0: nothing, 1: only configs, 2: only n-bests, 3: both configs and n-bests
-
-  private int compressFiles;
-  // should PRO gzip the large files? If 0, no compression takes place.
-  // If 1, compression is performed on: decoder output files, temp sents files,
-  // and temp feats files.
-
-  private int sizeOfNBest;
-  // size of N-best list generated by decoder at each iteration
-  // (aka simply N, but N is a bad variable name)
-
-  private long seed;
-  // seed used to create random number generators
-
-  private boolean randInit;
-  // if true, parameters are initialized randomly. If false, parameters
-  // are initialized using values from parameter file.
-
-  private int maxMERTIterations, minMERTIterations, prevMERTIterations;
-  // max: maximum number of MERT iterations
-  // min: minimum number of MERT iterations before an early MERT exit
-  // prev: number of previous MERT iterations from which to consider candidates (in addition to
-  // the candidates from the current iteration)
-
-  private double stopSigValue;
-  // early MERT exit if no weight changes by more than stopSigValue
-  // (but see minMERTIterations above and stopMinIts below)
-
-  private int stopMinIts;
-  // some early stopping criterion must be satisfied in stopMinIts *consecutive* iterations
-  // before an early exit (but see minMERTIterations above)
-
-  private boolean oneModificationPerIteration;
-  // if true, each MERT iteration performs at most one parameter modification.
-  // If false, a new MERT iteration starts (i.e. a new N-best list is
-  // generated) only after the previous iteration reaches a local maximum.
-
-  private String metricName;
-  // name of evaluation metric optimized by MERT
-
-  private String metricName_display;
-  // name of evaluation metric optimized by MERT, possibly with "doc-level " prefixed
-
-  private String[] metricOptions;
-  // options for the evaluation metric (e.g. for BLEU, maxGramLength and effLengthMethod)
-
-  private EvaluationMetric evalMetric;
-  // the evaluation metric used by MERT
-
-  private int suffStatsCount;
-  // number of sufficient statistics for the evaluation metric
-
-  private String tmpDirPrefix;
-  // prefix for the PRO.temp.* files
-
-  private boolean passIterationToDecoder;
-  // should the iteration number be passed as an argument to decoderCommandFileName?
-
-  // used for pro
-  private String classifierAlg; // the classification algorithm(percep, megam, maxent ...)
-  private String[] classifierParams = null; // the param array for each classifier
-  private int Tau;
-  private int Xi;
-  private double interCoef;
-  private double metricDiff;
-  private double prevMetricScore = 0; // final metric score of the previous iteration, used only
-                                      // when returnBest = true
-  private boolean returnBest = false; // return the best weight during tuning
-
-  private String dirPrefix; // where are all these files located?
-  private String paramsFileName, docInfoFileName, finalLambdaFileName;
-  private String sourceFileName, refFileName, decoderOutFileName;
-  private String decoderConfigFileName, decoderCommandFileName;
-  private String fakeFileNameTemplate, fakeFileNamePrefix, fakeFileNameSuffix;
-
-  // e.g. output.it[1-x].someOldRun would be specified as:
-  // output.it?.someOldRun
-  // and we'd have prefix = "output.it" and suffix = ".sameOldRun"
-
-  // private int useDisk;
-
-  public PROCore(JoshuaConfiguration joshuaConfiguration) {
-    this.joshuaConfiguration = joshuaConfiguration;
-  }
-
-  public PROCore(String[] args, JoshuaConfiguration joshuaConfiguration) {
-    this.joshuaConfiguration = joshuaConfiguration;
-    EvaluationMetric.set_knownMetrics();
-    processArgsArray(args);
-    initialize(0);
-  }
-
-  public PROCore(String configFileName, JoshuaConfiguration joshuaConfiguration) {
-    this.joshuaConfiguration = joshuaConfiguration;
-    EvaluationMetric.set_knownMetrics();
-    processArgsArray(cfgFileToArgsArray(configFileName));
-    initialize(0);
-  }
-
-  private void initialize(int randsToSkip) {
-    println("NegInf: " + NegInf + ", PosInf: " + PosInf + ", epsilon: " + epsilon, 4);
-
-    randGen = new Random(seed);
-    for (int r = 1; r <= randsToSkip; ++r) {
-      randGen.nextDouble();
-    }
-    generatedRands = randsToSkip;
-
-    if (randsToSkip == 0) {
-      println("----------------------------------------------------", 1);
-      println("Initializing...", 1);
-      println("----------------------------------------------------", 1);
-      println("", 1);
-
-      println("Random number generator initialized using seed: " + seed, 1);
-      println("", 1);
-    }
-
-    // COUNT THE TOTAL NUM OF SENTENCES TO BE DECODED, refFileName IS THE COMBINED REFERENCE FILE
-    // NAME(AUTO GENERATED)
-    numSentences = countLines(refFileName) / refsPerSen;
-
-    // ??
-    processDocInfo();
-    // sets numDocuments and docOfSentence[]
-
-    if (numDocuments > 1)
-      metricName_display = "doc-level " + metricName;
-
-    // ??
-    set_docSubsetInfo(docSubsetInfo);
-
-    // count the number of initial features
-    numParams = countNonEmptyLines(paramsFileName) - 1;
-    numParamsOld = numParams;
-
-    // read parameter config file
-    try {
-      // read dense parameter names
-      BufferedReader inFile_names = new BufferedReader(new FileReader(paramsFileName));
-
-      for (int c = 1; c <= numParams; ++c) {
-        String line = "";
-        while (line != null && line.length() == 0) { // skip empty lines
-          line = inFile_names.readLine();
-        }
-
-        // save feature names
-        String paramName = (line.substring(0, line.indexOf("|||"))).trim();
-        Vocabulary.id(paramName);
-        // System.err.println(String.format("VOCAB(%s) = %d", paramName, id));
-      }
-
-      inFile_names.close();
-    } catch (FileNotFoundException e) {
-      System.err.println("FileNotFoundException in PROCore.initialize(int): " + e.getMessage());
-      System.exit(99901);
-    } catch (IOException e) {
-      System.err.println("IOException in PROCore.initialize(int): " + e.getMessage());
-      System.exit(99902);
-    }
-
-    // the parameter file contains one line per parameter
-    // and one line for the normalization method
-    // indexing starts at 1 in these arrays
-    for (int p = 0; p <= numParams; ++p)
-      lambda.add(new Double(0));
-    bestLambda.add(new Double(0));
-    // why only lambda is a list? because the size of lambda
-    // may increase over time, but other arrays are specified in
-    // the param config file, only used for initialization
-    isOptimizable = new boolean[1 + numParams];
-    minRandValue = new double[1 + numParams];
-    maxRandValue = new double[1 + numParams];
-    defaultLambda = new double[1 + numParams];
-    normalizationOptions = new double[3];
-
-    // read initial param values
-    processParamFile();
-    // sets the arrays declared just above
-
-    // SentenceInfo.createV(); // uncomment ONLY IF using vocabulary implementation of SentenceInfo
-
-    String[][] refSentences = new String[numSentences][refsPerSen];
-
-    try {
-
-      // read in reference sentences
-      InputStream inStream_refs = new FileInputStream(new File(refFileName));
-      BufferedReader inFile_refs = new BufferedReader(new InputStreamReader(inStream_refs, "utf8"));
-
-      for (int i = 0; i < numSentences; ++i) {
-        for (int r = 0; r < refsPerSen; ++r) {
-          // read the rth reference translation for the ith sentence
-          refSentences[i][r] = inFile_refs.readLine();
-        }
-      }
-
-      inFile_refs.close();
-
-      // normalize reference sentences
-      for (int i = 0; i < numSentences; ++i) {
-        for (int r = 0; r < refsPerSen; ++r) {
-          // normalize the rth reference translation for the ith sentence
-          refSentences[i][r] = normalize(refSentences[i][r], textNormMethod);
-        }
-      }
-
-      // read in decoder command, if any
-      decoderCommand = null;
-      if (decoderCommandFileName != null) {
-        if (fileExists(decoderCommandFileName)) {
-          BufferedReader inFile_comm = new BufferedReader(new FileReader(decoderCommandFileName));
-          decoderCommand = inFile_comm.readLine(); // READ IN DECODE COMMAND
-          inFile_comm.close();
-        }
-      }
-    } catch (FileNotFoundException e) {
-      System.err.println("FileNotFoundException in PROCore.initialize(int): " + e.getMessage());
-      System.exit(99901);
-    } catch (IOException e) {
-      System.err.println("IOException in PROCore.initialize(int): " + e.getMessage());
-      System.exit(99902);
-    }
-
-    // set static data members for the EvaluationMetric class
-    EvaluationMetric.set_numSentences(numSentences);
-    EvaluationMetric.set_numDocuments(numDocuments);
-    EvaluationMetric.set_refsPerSen(refsPerSen);
-    EvaluationMetric.set_refSentences(refSentences);
-    EvaluationMetric.set_tmpDirPrefix(tmpDirPrefix);
-
-    evalMetric = EvaluationMetric.getMetric(metricName, metricOptions);
-    // used only if returnBest = true
-    prevMetricScore = evalMetric.getToBeMinimized() ? PosInf : NegInf;
-
-    // length of sufficient statistics
-    // for bleu: suffstatscount=8 (2*ngram+2)
-    suffStatsCount = evalMetric.get_suffStatsCount();
-
-    // set static data members for the IntermediateOptimizer class
-    /*
-     * IntermediateOptimizer.set_MERTparams(numSentences, numDocuments, docOfSentence,
-     * docSubsetInfo, numParams, normalizationOptions, isOptimizable oneModificationPerIteration,
-     * evalMetric, tmpDirPrefix, verbosity);
-     */
-
-    // print info
-    if (randsToSkip == 0) { // i.e. first iteration
-      println("Number of sentences: " + numSentences, 1);
-      println("Number of documents: " + numDocuments, 1);
-      println("Optimizing " + metricName_display, 1);
-
-      /*
-       * print("docSubsetInfo: {", 1); for (int f = 0; f < 6; ++f) print(docSubsetInfo[f] + ", ",
-       * 1); println(docSubsetInfo[6] + "}", 1);
-       */
-
-      println("Number of initial features: " + numParams, 1);
-      print("Initial feature names: {", 1);
-
-      for (int c = 1; c <= numParams; ++c)
-        print("\"" + Vocabulary.word(c) + "\"", 1);
-      println("}", 1);
-      println("", 1);
-
-      // TODO just print the correct info
-      println("c    Default value\tOptimizable?\tRand. val. range", 1);
-
-      for (int c = 1; c <= numParams; ++c) {
-        print(c + "     " + f4.format(lambda.get(c).doubleValue()) + "\t\t", 1);
-
-        if (!isOptimizable[c]) {
-          println(" No", 1);
-        } else {
-          print(" Yes\t\t", 1);
-          print(" [" + minRandValue[c] + "," + maxRandValue[c] + "]", 1);
-          println("", 1);
-        }
-      }
-
-      println("", 1);
-      print("Weight vector normalization method: ", 1);
-      if (normalizationOptions[0] == 0) {
-        println("none.", 1);
-      } else if (normalizationOptions[0] == 1) {
-        println(
-            "weights will be scaled so that the \""
-                + Vocabulary.word((int) normalizationOptions[2])
-                + "\" weight has an absolute value of " + normalizationOptions[1] + ".", 1);
-      } else if (normalizationOptions[0] == 2) {
-        println("weights will be scaled so that the maximum absolute value is "
-            + normalizationOptions[1] + ".", 1);
-      } else if (normalizationOptions[0] == 3) {
-        println("weights will be scaled so that the minimum absolute value is "
-            + normalizationOptions[1] + ".", 1);
-      } else if (normalizationOptions[0] == 4) {
-        println("weights will be scaled so that the L-" + normalizationOptions[1] + " norm is "
-            + normalizationOptions[2] + ".", 1);
-      }
-
-      println("", 1);
-
-      println("----------------------------------------------------", 1);
-      println("", 1);
-
-      // rename original config file so it doesn't get overwritten
-      // (original name will be restored in finish())
-      renameFile(decoderConfigFileName, decoderConfigFileName + ".PRO.orig");
-    } // if (randsToSkip == 0)
-
-    // by default, load joshua decoder
-    if (decoderCommand == null && fakeFileNameTemplate == null) {
-      println("Loading Joshua decoder...", 1);
-      myDecoder = new Decoder(joshuaConfiguration, decoderConfigFileName + ".PRO.orig");
-      println("...finished loading @ " + (new Date()), 1);
-      println("");
-    } else {
-      myDecoder = null;
-    }
-
-    @SuppressWarnings("unchecked")
-    TreeSet<Integer>[] temp_TSA = new TreeSet[numSentences];
-    indicesOfInterest_all = temp_TSA;
-
-    for (int i = 0; i < numSentences; ++i) {
-      indicesOfInterest_all[i] = new TreeSet<Integer>();
-    }
-  } // void initialize(...)
-
-  // -------------------------
-
-  public void run_PRO() {
-    run_PRO(minMERTIterations, maxMERTIterations, prevMERTIterations);
-  }
-
-  public void run_PRO(int minIts, int maxIts, int prevIts) {
-    // FIRST, CLEAN ALL PREVIOUS TEMP FILES
-    String dir;
-    int k = tmpDirPrefix.lastIndexOf("/");
-    if (k >= 0) {
-      dir = tmpDirPrefix.substring(0, k + 1);
-    } else {
-      dir = "./";
-    }
-    String files;
-    File folder = new File(dir);
-
-    if (folder.exists()) {
-      File[] listOfFiles = folder.listFiles();
-
-      for (int i = 0; i < listOfFiles.length; i++) {
-        if (listOfFiles[i].isFile()) {
-          files = listOfFiles[i].getName();
-          if (files.startsWith("PRO.temp")) {
-            deleteFile(files);
-          }
-        }
-      }
-    }
-
-    println("----------------------------------------------------", 1);
-    println("PRO run started @ " + (new Date()), 1);
-    // printMemoryUsage();
-    println("----------------------------------------------------", 1);
-    println("", 1);
-
-    // if no default lambda is provided
-    if (randInit) {
-      println("Initializing lambda[] randomly.", 1);
-      // initialize optimizable parameters randomly (sampling uniformly from
-      // that parameter's random value range)
-      lambda = randomLambda();
-    }
-
-    println("Initial lambda[]: " + lambdaToString(lambda), 1);
-    println("", 1);
-
-    int[] maxIndex = new int[numSentences];
-
-    // HashMap<Integer,int[]>[] suffStats_array = new HashMap[numSentences];
-    // suffStats_array[i] maps candidates of interest for sentence i to an array
-    // storing the sufficient statistics for that candidate
-
-    int earlyStop = 0;
-    // number of consecutive iteration an early stopping criterion was satisfied
-
-    for (int iteration = 1;; ++iteration) {
-
-      // what does "A" contain?
-      // retA[0]: FINAL_score
-      // retA[1]: earlyStop
-      // retA[2]: should this be the last iteration?
-      double[] A = run_single_iteration(iteration, minIts, maxIts, prevIts, earlyStop, maxIndex);
-      if (A != null) {
-        earlyStop = (int) A[1];
-        if (A[2] == 1)
-          break;
-      } else {
-        break;
-      }
-
-    } // for (iteration)
-
-    println("", 1);
-
-    println("----------------------------------------------------", 1);
-    println("PRO run ended @ " + (new Date()), 1);
-    // printMemoryUsage();
-    println("----------------------------------------------------", 1);
-    println("", 1);
-
-    if (!returnBest)
-      println("FINAL lambda: " + lambdaToString(lambda), 1);
-    // + " (" + metricName_display + ": " + FINAL_score + ")",1);
-    else
-      println("BEST lambda: " + lambdaToString(lambda), 1);
-    // + " (" + metricName_display + ": " + FINAL_score + ")",1);
-
-    // delete intermediate .temp.*.it* decoder output files
-    for (int iteration = 1; iteration <= maxIts; ++iteration) {
-      if (compressFiles == 1) {
-        deleteFile(tmpDirPrefix + "temp.sents.it" + iteration + ".gz");
-        deleteFile(tmpDirPrefix + "temp.feats.it" + iteration + ".gz");
-        if (fileExists(tmpDirPrefix + "temp.stats.it" + iteration + ".copy.gz")) {
-          deleteFile(tmpDirPrefix + "temp.stats.it" + iteration + ".copy.gz");
-        } else {
-          deleteFile(tmpDirPrefix + "temp.stats.it" + iteration + ".gz");
-        }
-      } else {
-        deleteFile(tmpDirPrefix + "temp.sents.it" + iteration);
-        deleteFile(tmpDirPrefix + "temp.feats.it" + iteration);
-        if (fileExists(tmpDirPrefix + "temp.stats.it" + iteration + ".copy")) {
-          deleteFile(tmpDirPrefix + "temp.stats.it" + iteration + ".copy");
-        } else {
-          deleteFile(tmpDirPrefix + "temp.stats.it" + iteration);
-        }
-      }
-    }
-  } // void run_PRO(int maxIts)
-
-  // this is the key function!
-  @SuppressWarnings("unchecked")
-  public double[] run_single_iteration(int iteration, int minIts, int maxIts, int prevIts,
-      int earlyStop, int[] maxIndex) {
-    double FINAL_score = 0;
-
-    double[] retA = new double[3];
-    // retA[0]: FINAL_score
-    // retA[1]: earlyStop
-    // retA[2]: should this be the last iteration?
-
-    boolean done = false;
-    retA[2] = 1; // will only be made 0 if we don't break from the following loop
-
-    // save feats and stats for all candidates(old & new)
-    HashMap<String, String>[] feat_hash = new HashMap[numSentences];
-    for (int i = 0; i < numSentences; i++)
-      feat_hash[i] = new HashMap<String, String>();
-
-    HashMap<String, String>[] stats_hash = new HashMap[numSentences];
-    for (int i = 0; i < numSentences; i++)
-      stats_hash[i] = new HashMap<String, String>();
-
-    while (!done) { // NOTE: this "loop" will only be carried out once
-      println("--- Starting PRO iteration #" + iteration + " @ " + (new Date()) + " ---", 1);
-
-      // printMemoryUsage();
-
-      /******************************/
-      // CREATE DECODER CONFIG FILE //
-      /******************************/
-
-      createConfigFile(lambda, decoderConfigFileName, decoderConfigFileName + ".PRO.orig");
-      // i.e. use the original config file as a template
-
-      /***************/
-      // RUN DECODER //
-      /***************/
-
-      if (iteration == 1) {
-        println("Decoding using initial weight vector " + lambdaToString(lambda), 1);
-      } else {
-        println("Redecoding using weight vector " + lambdaToString(lambda), 1);
-      }
-
-      // generate the n-best file after decoding
-      String[] decRunResult = run_decoder(iteration); // iteration passed in case fake decoder will
-                                                      // be used
-      // [0] name of file to be processed
-      // [1] indicates how the output file was obtained:
-      // 1: external decoder
-      // 2: fake decoder
-      // 3: internal decoder
-
-      if (!decRunResult[1].equals("2")) {
-        println("...finished decoding @ " + (new Date()), 1);
-      }
-
-      checkFile(decRunResult[0]);
-
-      /************* END OF DECODING **************/
-
-      println("Producing temp files for iteration " + iteration, 3);
-
-      produceTempFiles(decRunResult[0], iteration);
-
-      // save intermedidate output files
-      // save joshua.config.pro.it*
-      if (saveInterFiles == 1 || saveInterFiles == 3) { // make copy of intermediate config file
-        if (!copyFile(decoderConfigFileName, decoderConfigFileName + ".PRO.it" + iteration)) {
-          println("Warning: attempt to make copy of decoder config file (to create"
-              + decoderConfigFileName + ".PRO.it" + iteration + ") was unsuccessful!", 1);
-        }
-      }
-
-      // save output.nest.PRO.it*
-      if (saveInterFiles == 2 || saveInterFiles == 3) { // make copy of intermediate decoder output
-                                                        // file...
-
-        if (!decRunResult[1].equals("2")) { // ...but only if no fake decoder
-          if (!decRunResult[0].endsWith(".gz")) {
-            if (!copyFile(decRunResult[0], decRunResult[0] + ".PRO.it" + iteration)) {
-              println("Warning: attempt to make copy of decoder output file (to create"
-                  + decRunResult[0] + ".PRO.it" + iteration + ") was unsuccessful!", 1);
-            }
-          } else {
-            String prefix = decRunResult[0].substring(0, decRunResult[0].length() - 3);
-            if (!copyFile(prefix + ".gz", prefix + ".PRO.it" + iteration + ".gz")) {
-              println("Warning: attempt to make copy of decoder output file (to create" + prefix
-                  + ".PRO.it" + iteration + ".gz" + ") was unsuccessful!", 1);
-            }
-          }
-
-          if (compressFiles == 1 && !decRunResult[0].endsWith(".gz")) {
-            gzipFile(decRunResult[0] + ".PRO.it" + iteration);
-          }
-        } // if (!fake)
-      }
-
-      // ------------- end of saving .pro.it* files ---------------
-
-      int[] candCount = new int[numSentences];
-      int[] lastUsedIndex = new int[numSentences];
-
-      ConcurrentHashMap<Integer, int[]>[] suffStats_array = new ConcurrentHashMap[numSentences];
-      for (int i = 0; i < numSentences; ++i) {
-        candCount[i] = 0;
-        lastUsedIndex[i] = -1;
-        // suffStats_array[i].clear();
-        suffStats_array[i] = new ConcurrentHashMap<Integer, int[]>();
-      }
-
-      // initLambda[0] is not used!
-      double[] initialLambda = new double[1 + numParams];
-      for (int i = 1; i <= numParams; ++i)
-        initialLambda[i] = lambda.get(i);
-
-      // the "score" in initialScore refers to that
-      // assigned by the evaluation metric)
-
-      // you may consider all candidates from iter 1, or from iter (iteration-prevIts) to current
-      // iteration
-      int firstIt = Math.max(1, iteration - prevIts);
-      // i.e. only process candidates from the current iteration and candidates
-      // from up to prevIts previous iterations.
-      println("Reading candidate translations from iterations " + firstIt + "-" + iteration, 1);
-      println("(and computing " + metricName
-          + " sufficient statistics for previously unseen candidates)", 1);
-      print("  Progress: ");
-
-      int[] newCandidatesAdded = new int[1 + iteration];
-      for (int it = 1; it <= iteration; ++it)
-        newCandidatesAdded[it] = 0;
-
-      try {
-        // read temp files from all past iterations
-        // 3 types of temp files:
-        // 1. output hypo at iter i
-        // 2. feature value of each hypo at iter i
-        // 3. suff stats of each hypo at iter i
-
-        // each inFile corresponds to the output of an iteration
-        // (index 0 is not used; no corresponding index for the current iteration)
-        BufferedReader[] inFile_sents = new BufferedReader[iteration];
-        BufferedReader[] inFile_feats = new BufferedReader[iteration];
-        BufferedReader[] inFile_stats = new BufferedReader[iteration];
-
-        // temp file(array) from previous iterations
-        for (int it = firstIt; it < iteration; ++it) {
-          InputStream inStream_sents, inStream_feats, inStream_stats;
-          if (compressFiles == 0) {
-            inStream_sents = new FileInputStream(tmpDirPrefix + "temp.sents.it" + it);
-            inStream_feats = new FileInputStream(tmpDirPrefix + "temp.feats.it" + it);
-            inStream_stats = new FileInputStream(tmpDirPrefix + "temp.stats.it" + it);
-          } else {
-            inStream_sents = new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.sents.it"
-                + it + ".gz"));
-            inStream_feats = new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.feats.it"
-                + it + ".gz"));
-            inStream_stats = new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.stats.it"
-                + it + ".gz"));
-          }
-
-          inFile_sents[it] = new BufferedReader(new InputStreamReader(inStream_sents, "utf8"));
-          inFile_feats[it] = new BufferedReader(new InputStreamReader(inStream_feats, "utf8"));
-          inFile_stats[it] = new BufferedReader(new InputStreamReader(inStream_stats, "utf8"));
-        }
-
-        InputStream inStream_sentsCurrIt, inStream_featsCurrIt, inStream_statsCurrIt;
-        // temp file for current iteration!
-        if (compressFiles == 0) {
-          inStream_sentsCurrIt = new FileInputStream(tmpDirPrefix + "temp.sents.it" + iteration);
-          inStream_featsCurrIt = new FileInputStream(tmpDirPrefix + "temp.feats.it" + iteration);
-        } else {
-          inStream_sentsCurrIt = new GZIPInputStream(new FileInputStream(tmpDirPrefix
-              + "temp.sents.it" + iteration + ".gz"));
-          inStream_featsCurrIt = new GZIPInputStream(new FileInputStream(tmpDirPrefix
-              + "temp.feats.it" + iteration + ".gz"));
-        }
-
-        BufferedReader inFile_sentsCurrIt = new BufferedReader(new InputStreamReader(
-            inStream_sentsCurrIt, "utf8"));
-        BufferedReader inFile_featsCurrIt = new BufferedReader(new InputStreamReader(
-            inStream_featsCurrIt, "utf8"));
-
-        BufferedReader inFile_statsCurrIt = null; // will only be used if statsCurrIt_exists below
-                                                  // is set to true
-        PrintWriter outFile_statsCurrIt = null; // will only be used if statsCurrIt_exists below is
-                                                // set to false
-
-        // just to check if temp.stat.it.iteration exists
-        boolean statsCurrIt_exists = false;
-
-        if (fileExists(tmpDirPrefix + "temp.stats.it" + iteration)) {
-          inStream_statsCurrIt = new FileInputStream(tmpDirPrefix + "temp.stats.it" + iteration);
-          inFile_statsCurrIt = new BufferedReader(new InputStreamReader(inStream_statsCurrIt,
-              "utf8"));
-          statsCurrIt_exists = true;
-          copyFile(tmpDirPrefix + "temp.stats.it" + iteration, tmpDirPrefix + "temp.stats.it"
-              + iteration + ".copy");
-        } else if (fileExists(tmpDirPrefix + "temp.stats.it" + iteration + ".gz")) {
-          inStream_statsCurrIt = new GZIPInputStream(new FileInputStream(tmpDirPrefix
-              + "temp.stats.it" + iteration + ".gz"));
-          inFile_statsCurrIt = new BufferedReader(new InputStreamReader(inStream_statsCurrIt,
-              "utf8"));
-          statsCurrIt_exists = true;
-          copyFile(tmpDirPrefix + "temp.stats.it" + iteration + ".gz", tmpDirPrefix
-              + "temp.stats.it" + iteration + ".copy.gz");
-        } else {
-          outFile_statsCurrIt = new PrintWriter(tmpDirPrefix + "temp.stats.it" + iteration);
-        }
-
-        // output the 4^th temp file: *.temp.stats.merged
-        PrintWriter outFile_statsMerged = new PrintWriter(tmpDirPrefix + "temp.stats.merged");
-        // write sufficient statistics from all the sentences
-        // from the output files into a single file
-        PrintWriter outFile_statsMergedKnown = new PrintWriter(tmpDirPrefix
-            + "temp.stats.mergedKnown");
-        // write sufficient statistics from all the sentences
-        // from the output files into a single file
-
-        // output the 5^th 6^th temp file, but will be deleted at the end of the function
-        FileOutputStream outStream_unknownCands = new FileOutputStream(tmpDirPrefix
-            + "temp.currIt.unknownCands", false);
-        OutputStreamWriter outStreamWriter_unknownCands = new OutputStreamWriter(
-            outStream_unknownCands, "utf8");
-        BufferedWriter outFile_unknownCands = new BufferedWriter(outStreamWriter_unknownCands);
-
-        PrintWriter outFile_unknownIndices = new PrintWriter(tmpDirPrefix
-            + "temp.currIt.unknownIndices");
-
-        String sents_str, feats_str, stats_str;
-
-        // BUG: this assumes a candidate string cannot be produced for two
-        // different source sentences, which is not necessarily true
-        // (It's not actually a bug, but only because existingCandStats gets
-        // cleared before moving to the next source sentence.)
-        // FIX: should be made an array, indexed by i
-        HashMap<String, String> existingCandStats = new HashMap<String, String>();
-        // VERY IMPORTANT:
-        // A CANDIDATE X MAY APPEARED IN ITER 1, ITER 3
-        // BUT IF THE USER SPECIFIED TO CONSIDER ITERATIONS FROM ONLY ITER 2, THEN
-        // X IS NOT A "REPEATED" CANDIDATE IN ITER 3. THEREFORE WE WANT TO KEEP THE
-        // SUFF STATS FOR EACH CANDIDATE(TO SAVE COMPUTATION IN THE FUTURE)
-
-        // Stores precalculated sufficient statistics for candidates, in case
-        // the same candidate is seen again. (SS stored as a String.)
-        // Q: Why do we care? If we see the same candidate again, aren't we going
-        // to ignore it? So, why do we care about the SS of this repeat candidate?
-        // A: A "repeat" candidate may not be a repeat candidate in later
-        // iterations if the user specifies a value for prevMERTIterations
-        // that causes MERT to skip candidates from early iterations.
-
-        String[] featVal_str;
-
-        int totalCandidateCount = 0;
-
-        // new candidate size for each sentence
-        int[] sizeUnknown_currIt = new int[numSentences];
-
-        for (int i = 0; i < numSentences; ++i) {
-          // process candidates from previous iterations
-          // low efficiency? for each iteration, it reads in all previous iteration outputs
-          // therefore a lot of overlapping jobs
-          // this is an easy implementation to deal with the situation in which user only specified
-          // "previt" and hopes to consider only the previous previt
-          // iterations, then for each iteration the existing candadites will be different
-          for (int it = firstIt; it < iteration; ++it) {
-            // Why up to but *excluding* iteration?
-            // Because the last iteration is handled a little differently, since
-            // the SS must be calculated (and the corresponding file created),
-            // which is not true for previous iterations.
-
-            for (int n = 0; n <= sizeOfNBest; ++n) {
-              // note that in all temp files, "||||||" is a separator between 2 n-best lists
-
-              // Why up to and *including* sizeOfNBest?
-              // So that it would read the "||||||" separator even if there is
-              // a complete list of sizeOfNBest candidates.
-
-              // for the nth candidate for the ith sentence, read the sentence, feature values,
-              // and sufficient statistics from the various temp files
-
-              // read one line of temp.sent, temp.feat, temp.stats from iteration it
-              sents_str = inFile_sents[it].readLine();
-              feats_str = inFile_feats[it].readLine();
-              stats_str = inFile_stats[it].readLine();
-
-              if (sents_str.equals("||||||")) {
-                n = sizeOfNBest + 1; // move on to the next n-best list
-              } else if (!existingCandStats.containsKey(sents_str)) // if this candidate does not
-                                                                    // exist
-              {
-                outFile_statsMergedKnown.println(stats_str);
-
-                // save feats & stats
-                feat_hash[i].put(sents_str, feats_str);
-                stats_hash[i].put(sents_str, stats_str);
-
-                // extract feature value
-                featVal_str = feats_str.split("\\s+");
-
-                if (feats_str.indexOf('=') != -1) {
-                  for (String featurePair : featVal_str) {
-                    String[] pair = featurePair.split("=");
-                    String name = pair[0];
-                    Double value = Double.parseDouble(pair[1]);
-                    int featId = Vocabulary.id(name);
-                    // need to identify newly fired feats here
-                    if (featId > numParams) {
-                      ++numParams;
-                      lambda.add(new Double(0));
-                    }
-                  }
-                }
-                existingCandStats.put(sents_str, stats_str);
-                candCount[i] += 1;
-                newCandidatesAdded[it] += 1;
-              } // if unseen candidate
-            } // for (n)
-          } // for (it)
-
-          outFile_statsMergedKnown.println("||||||");
-
-          // ---------- end of processing previous iterations ----------
-          // ---------- now start processing new candidates ----------
-
-          // now process the candidates of the current iteration
-          // now determine the new candidates of the current iteration
-
-          /*
-           * remember: BufferedReader inFile_sentsCurrIt BufferedReader inFile_featsCurrIt
-           * PrintWriter outFile_statsCurrIt
-           */
-
-          String[] sentsCurrIt_currSrcSent = new String[sizeOfNBest + 1];
-
-          Vector<String> unknownCands_V = new Vector<String>();
-          // which candidates (of the i'th source sentence) have not been seen before
-          // this iteration?
-
-          for (int n = 0; n <= sizeOfNBest; ++n) {
-            // Why up to and *including* sizeOfNBest?
-            // So that it would read the "||||||" separator even if there is
-            // a complete list of sizeOfNBest candidates.
-
-            // for the nth candidate for the ith sentence, read the sentence,
-            // and store it in the sentsCurrIt_currSrcSent array
-
-            sents_str = inFile_sentsCurrIt.readLine(); // read one candidate from the current
-                                                       // iteration
-            sentsCurrIt_currSrcSent[n] = sents_str; // Note: possibly "||||||"
-
-            if (sents_str.equals("||||||")) {
-              n = sizeOfNBest + 1;
-            } else if (!existingCandStats.containsKey(sents_str)) {
-              unknownCands_V.add(sents_str); // NEW CANDIDATE FROM THIS ITERATION
-              writeLine(sents_str, outFile_unknownCands);
-              outFile_unknownIndices.println(i); // INDEX OF THE NEW CANDIDATES
-              newCandidatesAdded[iteration] += 1;
-              existingCandStats.put(sents_str, "U"); // i.e. unknown
-              // we add sents_str to avoid duplicate entries in unknownCands_V
-            }
-          } // for (n)
-
-          // only compute suff stats for new candidates
-          // now unknownCands_V has the candidates for which we need to calculate
-          // sufficient statistics (for the i'th source sentence)
-          int sizeUnknown = unknownCands_V.size();
-          sizeUnknown_currIt[i] = sizeUnknown;
-
-          existingCandStats.clear();
-
-        } // for (i) each sentence
-
-        // ---------- end of merging candidates stats from previous iterations
-        // and finding new candidates ------------
-
-        /*
-         * int[][] newSuffStats = null; if (!statsCurrIt_exists && sizeUnknown > 0) { newSuffStats =
-         * evalMetric.suffStats(unknownCands, indices); }
-         */
-
-        outFile_statsMergedKnown.close();
-        outFile_unknownCands.close();
-        outFile_unknownIndices.close();
-
-        // want to re-open all temp files and start from scratch again?
-        for (int it = firstIt; it < iteration; ++it) // previous iterations temp files
-        {
-          inFile_sents[it].close();
-          inFile_stats[it].close();
-
-          InputStream inStream_sents, inStream_stats;
-          if (compressFiles == 0) {
-            inStream_sents = new FileInputStream(tmpDirPrefix + "temp.sents.it" + it);
-            inStream_stats = new FileInputStream(tmpDirPrefix + "temp.stats.it" + it);
-          } else {
-            inStream_sents = new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.sents.it"
-                + it + ".gz"));
-            inStream_stats = new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.stats.it"
-                + it + ".gz"));
-          }
-
-          inFile_sents[it] = new BufferedReader(new InputStreamReader(inStream_sents, "utf8"));
-          inFile_stats[it] = new BufferedReader(new InputStreamReader(inStream_stats, "utf8"));
-        }
-
-        inFile_sentsCurrIt.close();
-        // current iteration temp files
-        if (compressFiles == 0) {
-          inStream_sentsCurrIt = new FileInputStream(tmpDirPrefix + "temp.sents.it" + iteration);
-        } else {
-          inStream_sentsCurrIt = new GZIPInputStream(new FileInputStream(tmpDirPrefix
-              + "temp.sents.it" + iteration + ".gz"));
-        }
-        inFile_sentsCurrIt = new BufferedReader(new InputStreamReader(inStream_sentsCurrIt, "utf8"));
-
-        // calculate SS for unseen candidates and write them to file
-        FileInputStream inStream_statsCurrIt_unknown = null;
-        BufferedReader inFile_statsCurrIt_unknown = null;
-
-        if (!statsCurrIt_exists && newCandidatesAdded[iteration] > 0) {
-          // create the file...
-          evalMetric.createSuffStatsFile(tmpDirPrefix + "temp.currIt.unknownCands", tmpDirPrefix
-              + "temp.currIt.unknownIndices", tmpDirPrefix + "temp.stats.unknown", sizeOfNBest);
-
-          // ...and open it
-          inStream_statsCurrIt_unknown = new FileInputStream(tmpDirPrefix + "temp.stats.unknown");
-          inFile_statsCurrIt_unknown = new BufferedReader(new InputStreamReader(
-              inStream_statsCurrIt_unknown, "utf8"));
-        }
-
-        // open mergedKnown file
-        // newly created by the big loop above
-        FileInputStream instream_statsMergedKnown = new FileInputStream(tmpDirPrefix
-            + "temp.stats.mergedKnown");
-        BufferedReader inFile_statsMergedKnown = new BufferedReader(new InputStreamReader(
-            instream_statsMergedKnown, "utf8"));
-
-        // num of features before observing new firing features from this iteration
-        numParamsOld = numParams;
-
-        for (int i = 0; i < numSentences; ++i) {
-          // reprocess candidates from previous iterations
-          for (int it = firstIt; it < iteration; ++it) {
-            for (int n = 0; n <= sizeOfNBest; ++n) {
-              sents_str = inFile_sents[it].readLine();
-              stats_str = inFile_stats[it].readLine();
-
-              if (sents_str.equals("||||||")) {
-                n = sizeOfNBest + 1;
-              } else if (!existingCandStats.containsKey(sents_str)) {
-                existingCandStats.put(sents_str, stats_str);
-              } // if unseen candidate
-            } // for (n)
-          } // for (it)
-
-          // copy relevant portion from mergedKnown to the merged file
-          String line_mergedKnown = inFile_statsMergedKnown.readLine();
-          while (!line_mergedKnown.equals("||||||")) {
-            outFile_statsMerged.println(line_mergedKnown);
-            line_mergedKnown = inFile_statsMergedKnown.readLine();
-          }
-
-          int[] stats = new int[suffStatsCount];
-
-          for (int n = 0; n <= sizeOfNBest; ++n) {
-            sents_str = inFile_sentsCurrIt.readLine();
-            feats_str = inFile_featsCurrIt.readLine();
-
-            if (sents_str.equals("||||||")) {
-              n = sizeOfNBest + 1;
-            } else if (!existingCandStats.containsKey(sents_str)) {
-
-              if (!statsCurrIt_exists) {
-                stats_str = inFile_statsCurrIt_unknown.readLine();
-
-                String[] temp_stats = stats_str.split("\\s+");
-                for (int s = 0; s < suffStatsCount; ++s) {
-                  stats[s] = Integer.parseInt(temp_stats[s]);
-                }
-
-                outFile_statsCurrIt.println(stats_str);
-              } else {
-                stats_str = inFile_statsCurrIt.readLine();
-
-                String[] temp_stats = stats_str.split("\\s+");
-                for (int s = 0; s < suffStatsCount; ++s) {
-                  stats[s] = Integer.parseInt(temp_stats[s]);
-                }
-              }
-
-              outFile_statsMerged.println(stats_str);
-
-              // save feats & stats
-              // System.out.println(sents_str+" "+feats_str);
-
-              feat_hash[i].put(sents_str, feats_str);
-              stats_hash[i].put(sents_str, stats_str);
-
-              featVal_str = feats_str.split("\\s+");
-
-              if (feats_str.indexOf('=') != -1) {
-                for (String featurePair : featVal_str) {
-                  String[] pair = featurePair.split("=");
-                  String name = pair[0];
-                  int featId = Vocabulary.id(name);
-                  // need to identify newly fired feats here
-                  if (featId > numParams) {
-                    ++numParams;
-                    lambda.add(new Double(0));
-                  }
-                }
-              }
-              existingCandStats.put(sents_str, stats_str);
-              candCount[i] += 1;
-
-              // newCandidatesAdded[iteration] += 1;
-              // moved to code above detecting new candidates
-            } else {
-              if (statsCurrIt_exists)
-                inFile_statsCurrIt.readLine();
-              else {
-                // write SS to outFile_statsCurrIt
-                stats_str = existingCandStats.get(sents_str);
-                outFile_statsCurrIt.println(stats_str);
-              }
-            }
-
-          } // for (n)
-
-          // now d = sizeUnknown_currIt[i] - 1
-
-          if (statsCurrIt_exists)
-            inFile_statsCurrIt.readLine();
-          else
-            outFile_statsCurrIt.println("||||||");
-
-          existingCandStats.clear();
-          totalCandidateCount += candCount[i];
-
-          // output sentence progress
-          if ((i + 1) % 500 == 0) {
-            print((i + 1) + "\n" + "            ", 1);
-          } else if ((i + 1) % 100 == 0) {
-            print("+", 1);
-          } else if ((i + 1) % 25 == 0) {
-            print(".", 1);
-          }
-
-        } // for (i)
-
-        inFile_statsMergedKnown.close();
-        outFile_statsMerged.close();
-
-        // for testing
-        /*
-         * int total_sent = 0; for( int i=0; i<numSentences; i++ ) {
-         * System.out.println(feat_hash[i].size()+" "+candCount[i]); total_sent +=
-         * feat_hash[i].size(); feat_hash[i].clear(); }
-         * System.out.println("----------------total sent: "+total_sent); total_sent = 0; for( int
-         * i=0; i<numSentences; i++ ) { System.out.println(stats_hash[i].size()+" "+candCount[i]);
-         * total_sent += stats_hash[i].size(); stats_hash[i].clear(); }
-         * System.out.println("*****************total sent: "+total_sent);
-         */
-
-        println("", 1); // finish progress line
-
-        for (int it = firstIt; it < iteration; ++it) {
-          inFile_sents[it].close();
-          inFile_feats[it].close();
-          inFile_stats[it].close();
-        }
-
-        inFile_sentsCurrIt.close();
-        inFile_featsCurrIt.close();
-        if (statsCurrIt_exists)
-          inFile_statsCurrIt.close();
-        else
-          outFile_statsCurrIt.close();
-
-        if (compressFiles == 1 && !statsCurrIt_exists) {
-          gzipFile(tmpDirPrefix + "temp.stats.it" + iteration);
-        }
-
-        // clear temp files
-        deleteFile(tmpDirPrefix + "temp.currIt.unknownCands");
-        deleteFile(tmpDirPrefix + "temp.currIt.unknownIndices");
-        deleteFile(tmpDirPrefix + "temp.stats.unknown");
-        deleteFile(tmpDirPrefix + "temp.stats.mergedKnown");
-
-        // cleanupMemory();
-
-        println("Processed " + totalCandidateCount + " distinct candidates " + "(about "
-            + totalCandidateCount / numSentences + " per sentence):", 1);
-        for (int it = firstIt; it <= iteration; ++it) {
-          println("newCandidatesAdded[it=" + it + "] = " + newCandidatesAdded[it] + " (about "
-              + newCandidatesAdded[it] / numSentences + " per sentence)", 1);
-        }
-
-        println("", 1);
-
-        println("Number of features observed so far: " + numParams);
-        println("", 1);
-
-      } catch (FileNotFoundException e) {
-        System.err.println("FileNotFoundException in PROCore.run_single_iteration(6): "
-            + e.getMessage());
-        System.exit(99901);
-      } catch (IOException e) {
-        System.err.println("IOException in PROCore.run_single_iteration(6): " + e.getMessage());
-        System.exit(99902);
-      }
-
-      // n-best list converges
-      if (newCandidatesAdded[iteration] == 0) {
-        if (!oneModificationPerIteration) {
-          println("No new candidates added in this iteration; exiting PRO.", 1);
-          println("", 1);
-          println("---  PRO iteration #" + iteration + " ending @ " + (new Date()) + "  ---", 1);
-          println("", 1);
-          deleteFile(tmpDirPrefix + "temp.stats.merged");
-
-          if (returnBest) {
-            // note that bestLambda.size() <= lambda.size()
-            for (int p = 1; p < bestLambda.size(); ++p)
-              lambda.set(p, bestLambda.get(p));
-            // and set the rest of lambda to be 0
-            for (int p = 0; p < lambda.size() - bestLambda.size(); ++p)
-              lambda.set(p + bestLambda.size(), new Double(0));
-          }
-
-          return null; // this means that the old values should be kept by the caller
-        } else {
-          println("Note: No new candidates added in this iteration.", 1);
-        }
-      }
-
-      /************* start optimization **************/
-
-      /*
-       * for( int v=1; v<initialLambda[1].length; v++ ) System.out.print(initialLambda[1][v]+" ");
-       * System.exit(0);
-       */
-
-      Vector<String> output = new Vector<String>();
-
-      // note: initialLambda[] has length = numParamsOld
-      // augmented with new feature weights, initial values are 0
-      double[] initialLambdaNew = new double[1 + numParams];
-      System.arraycopy(initialLambda, 1, initialLambdaNew, 1, numParamsOld);
-
-      // finalLambda[] has length = numParams (considering new features)
-      double[] finalLambda = new double[1 + numParams];
-
-      Optimizer opt = new Optimizer(seed + iteration, isOptimizable, output, initialLambdaNew,
-          feat_hash, stats_hash, evalMetric, Tau, Xi, metricDiff, normalizationOptions,
-          classifierAlg, classifierParams);
-      finalLambda = opt.run_Optimizer();
-
-      if (returnBest) {
-        double metricScore = opt.getMetricScore();
-        if (!evalMetric.getToBeMinimized()) {
-          if (metricScore > prevMetricScore) {
-            prevMetricScore = metricScore;
-            for (int p = 1; p < bestLambda.size(); ++p)
-              bestLambda.set(p, finalLambda[p]);
-            if (1 + numParams > bestLambda.size()) {
-              for (int p = bestLambda.size(); p <= numParams; ++p)
-                bestLambda.add(p, finalLambda[p]);
-            }
-          }
-        } else {
-          if (metricScore < prevMetricScore) {
-            prevMetricScore = metricScore;
-            for (int p = 1; p < bestLambda.size(); ++p)
-              bestLambda.set(p, finalLambda[p]);
-            if (1 + numParams > bestLambda.size()) {
-              for (int p = bestLambda.size(); p <= numParams; ++p)
-                bestLambda.add(p, finalLambda[p]);
-            }
-          }
-        }
-      }
-
-      // System.out.println(finalLambda.length);
-      // for( int i=0; i<finalLambda.length-1; i++ )
-      // System.out.print(finalLambda[i+1]+" ");
-      // System.out.println();
-
-      /************* end optimization **************/
-
-      for (int i = 0; i < output.size(); i++)
-        println(output.get(i));
-
-      // check if any parameter has been updated
-      boolean anyParamChanged = false;
-      boolean anyParamChangedSignificantly = false;
-
-      for (int c = 1; c <= numParams; ++c) {
-        if (finalLambda[c] != lambda.get(c)) {
-          anyParamChanged = true;
-        }
-        if (Math.abs(finalLambda[c] - lambda.get(c)) > stopSigValue) {
-          anyParamChangedSignificantly = true;
-        }
-      }
-
-      // System.arraycopy(finalLambda,1,lambda,1,numParams);
-
-      println("---  PRO iteration #" + iteration + " ending @ " + (new Date()) + "  ---", 1);
-      println("", 1);
-
-      if (!anyParamChanged) {
-        println("No parameter value changed in this iteration; exiting PRO.", 1);
-        println("", 1);
-        break; // exit for (iteration) loop preemptively
-      }
-
-      // was an early stopping criterion satisfied?
-      boolean critSatisfied = false;
-      if (!anyParamChangedSignificantly && stopSigValue >= 0) {
-        println("Note: No parameter value changed significantly " + "(i.e. by more than "
-            + stopSigValue + ") in this iteration.", 1);
-        critSatisfied = true;
-      }
-
-      if (critSatisfied) {
-        ++earlyStop;
-        println("", 1);
-      } else {
-        earlyStop = 0;
-      }
-
-      // if min number of iterations executed, investigate if early exit should happen
-      if (iteration >= minIts && earlyStop >= stopMinIts) {
-        println("Some early stopping criteria has been observed " + "in " + stopMinIts
-            + " consecutive iterations; exiting PRO.", 1);
-        println("", 1);
-
-        if (returnBest) {
-          // note that numParams >= bestLamba.size()-1 here!
-          for (int f = 1; f <= bestLambda.size() - 1; ++f)
-            lambda.set(f, bestLambda.get(f));
-        } else {
-          for (int f = 1; f <= numParams; ++f)
-            lambda.set(f, finalLambda[f]);
-        }
-
-        break; // exit for (iteration) loop preemptively
-      }
-
-      // if max number of iterations executed, exit
-      if (iteration >= maxIts) {
-        println("Maximum number of PRO iterations reached; exiting PRO.", 1);
-        println("", 1);
-
-        if (returnBest) {
-          // note that numParams >= bestLamba.size()-1 here!
-          for (int f = 1; f <= bestLambda.size() - 1; ++f)
-            lambda.set(f, bestLambda.get(f));
-        } else {
-          for (int f = 1; f <= numParams; ++f)
-            lambda.set(f, finalLambda[f]);
-        }
-
-        break; // exit for (iteration) loop
-      }
-
-      // use the new wt vector to decode the next iteration
-      // (interpolation with previous wt vector)
-      for (int i = 1; i <= numParams; i++)
-        lambda.set(i, interCoef * finalLambda[i] + (1 - interCoef) * lambda.get(i).doubleValue());
-
-      println("Next iteration will decode with lambda: " + lambdaToString(lambda), 1);
-      println("", 1);
-
-      // printMemoryUsage();
-      for (int i = 0; i < numSentences; ++i) {
-        suffStats_array[i].clear();
-      }
-      // cleanupMemory();
-      // println("",2);
-
-      retA[2] = 0; // i.e. this should NOT be the last iteration
-      done = true;
-
-    } // while (!done) // NOTE: this "loop" will only be carried out once
-
-    // delete .temp.stats.merged file, since it is not needed in the next
-    // iteration (it will be recreated from scratch)
-    deleteFile(tmpDirPrefix + "temp.stats.merged");
-
-    retA[0] = FINAL_score;
-    retA[1] = earlyStop;
-    return retA;
-
-  } // run_single_iteration
-
-  private String lambdaToString(ArrayList<Double> lambdaA) {
-    String retStr = "{";
-    int featToPrint = numParams > 15 ? 15 : numParams;
-    // print at most the first 15 features
-
-    retStr += "(listing the first " + featToPrint + " lambdas)";
-    for (int c = 1; c <= featToPrint - 1; ++c) {
-      retStr += "" + String.format("%.4f", lambdaA.get(c).doubleValue()) + ", ";
-    }
-    retStr += "" + String.format("%.4f", lambdaA.get(numParams).doubleValue()) + "}";
-
-    return retStr;
-  }
-
-  private String[] run_decoder(int iteration) {
-    String[] retSA = new String[2];
-
-    // retsa saves the output file name(nbest-file)
-    // and the decoder type
-
-    // [0] name of file to be processed
-    // [1] indicates how the output file was obtained:
-    // 1: external decoder
-    // 2: fake decoder
-    // 3: internal decoder
-
-    // use fake decoder
-    if (fakeFileNameTemplate != null
-        && fileExists(fakeFileNamePrefix + iteration + fakeFileNameSuffix)) {
-      String fakeFileName = fakeFileNamePrefix + iteration + fakeFileNameSuffix;
-      println("Not running decoder; using " + fakeFileName + " instead.", 1);
-      /*
-       * if (fakeFileName.endsWith(".gz")) { copyFile(fakeFileName,decoderOutFileName+".gz");
-       * gunzipFile(decoderOutFileName+".gz"); } else { copyFile(fakeFileName,decoderOutFileName); }
-       */
-      retSA[0] = fakeFileName;
-      retSA[1] = "2";
-
-    } else {
-      println("Running external decoder...", 1);
-
-      try {
-        ArrayList<String> cmd = new ArrayList<String>();
-        cmd.add(decoderCommandFileName);
-
-        if (passIterationToDecoder)
-          cmd.add(Integer.toString(iteration));
-
-        ProcessBuilder pb = new ProcessBuilder(cmd);
-        // this merges the error and output streams of the subprocess
-        pb.redirectErrorStream(true);
-        Process p = pb.start();
-
-        // capture the sub-command's output
-        new StreamGobbler(p.getInputStream(), decVerbosity).start();
-
-        int decStatus = p.waitFor();
-        if (decStatus != validDecoderExitValue) {
-          println("Call to decoder returned " + decStatus + "; was expecting "
-              + validDecoderExitValue + ".");
-          System.exit(30);
-        }
-      } catch (IOException e) {
-        System.err.println("IOException in PROCore.run_decoder(int): " + e.getMessage());
-        System.exit(99902);
-      } catch (InterruptedException e) {
-        System.err.println("InterruptedException in PROCore.run_decoder(int): " + e.getMessage());
-        System.exit(99903);
-      }
-
-      retSA[0] = decoderOutFileName;
-      retSA[1] = "1";
-
-    }
-
-    return retSA;
-  }
-
-  private void produceTempFiles(String nbestFileName, int iteration) {
-    try {
-      String sentsFileName = tmpDirPrefix + "temp.sents.it" + iteration;
-      String featsFileName = tmpDirPrefix + "temp.feats.it" + iteration;
-
-      FileOutputStream outStream_sents = new FileOutputStream(sentsFileName, false);
-      OutputStreamWriter outStreamWriter_sents = new OutputStreamWriter(outStream_sents, "utf8");
-      BufferedWriter outFile_sents = new BufferedWriter(outStreamWriter_sents);
-
-      PrintWriter outFile_feats = new PrintWriter(featsFileName);
-
-      InputStream inStream_nbest = null;
-      if (nbestFileName.endsWith(".gz")) {
-        inStream_nbest = new GZIPInputStream(new FileInputStream(nbestFileName));
-      } else {
-        inStream_nbest = new FileInputStream(nbestFileName);
-      }
-      BufferedReader inFile_nbest = new BufferedReader(
-          new InputStreamReader(inStream_nbest, "utf8"));
-
-      String line; // , prevLine;
-      String candidate_str = "";
-      String feats_str = "";
-
-      int i = 0;
-      int n = 0;
-      line = inFile_nbest.readLine();
-
-      while (line != null) {
-
-        /*
-         * line format:
-         * 
-         * i ||| words of candidate translation . ||| feat-1_val feat-2_val ... feat-numParams_val
-         * .*
-         */
-
-        // in a well formed file, we'd find the nth candidate for the ith sentence
-
-        int read_i = Integer.parseInt((line.substring(0, line.indexOf("|||"))).trim());
-
-        if (read_i != i) {
-          writeLine("||||||", outFile_sents);
-          outFile_feats.println("||||||");
-          n = 0;
-          ++i;
-        }
-
-        line = (line.substring(line.indexOf("|||") + 3)).trim(); // get rid of initial text
-
-        candidate_str = (line.substring(0, line.indexOf("|||"))).trim();
-        feats_str = (line.substring(line.indexOf("|||") + 3)).trim();
-        // get rid of candidate string
-
-        int junk_i = feats_str.indexOf("|||");
-        if (junk_i >= 0) {
-          feats_str = (feats_str.substring(0, junk_i)).trim();
-        }
-
-        writeLine(normalize(candidate_str, textNormMethod), outFile_sents);
-        outFile_feats.println(feats_str);
-
-        ++n;
-        if (n == sizeOfNBest) {
-          writeLine("||||||", outFile_sents);
-          outFile_feats.println("||||||");
-          n = 0;
-          ++i;
-        }
-
-        line = inFile_nbest.readLine();
-      }
-
-      if (i != numSentences) { // last sentence had too few candidates
-        writeLine("||||||", outFile_sents);
-        outFile_feats.println("||||||");
-      }
-
-      inFile_nbest.close();
-      outFile_sents.close();
-      outFile_feats.close();
-
-      if (compressFiles == 1) {
-        gzipFile(sentsFileName);
-        gzipFile(featsFileName);
-      }
-
-    } catch (FileNotFoundException e) {
-      System.err.println("FileNotFoundException in PROCore.produceTempFiles(int): "
-          + e.getMessage());
-      System.exit(99901);
-    } catch (IOException e) {
-      System.err.println("IOException in PROCore.produceTempFiles(int): " + e.getMessage());
-      System.exit(99902);
-    }
-
-  }
-
-  private void createConfigFile(ArrayList<Double> params, String cfgFileName,
-      String templateFileName) {
-    try {
-      // i.e. create cfgFileName, which is similar to templateFileName, but with
-      // params[] as parameter values
-
-      BufferedReader inFile = new BufferedReader(new FileReader(templateFileName));
-      PrintWriter outFile = new PrintWriter(cfgFileName);
-
-      BufferedReader inFeatDefFile = null;
-      PrintWriter outFeatDefFile = null;
-      int origFeatNum = 0; // feat num in the template file
-
-      String line = inFile.readLine();
-      while (line != null) {
-        int c_match = -1;
-        for (int c = 1; c <= numParams; ++c) {
-          if (line.startsWith(Vocabulary.word(c) + " ")) {
-            c_match = c;
-            ++origFeatNum;
-            break;
-          }
-        }
-
-        if (c_match == -1) {
-          outFile.println(line);
-        } else {
-          if (Math.abs(params.get(c_match).doubleValue()) > 1e-20)
-            outFile.println(Vocabulary.word(c_match) + " " + params.get(c_match));
-        }
-
-        line = inFile.readLine();
-      }
-
-      // now append weights of new features
-      for (int c = origFeatNum + 1; c <= numParams; ++c) {
-        if (Math.abs(params.get(c).doubleValue()) > 1e-20)
-          outFile.println(Vocabulary.word(c) + " " + params.get(c));
-      }
-
-      inFile.close();
-      outFile.close();
-    } catch (IOException e) {
-      System.err.println("IOException in PROCore.createConfigFile(double[],String,String): "
-          + e.getMessage());
-      System.exit(99902);
-    }
-  }
-
-  private void processParamFile() {
-    // process parameter file
-    Scanner inFile_init = null;
-    try {
-      inFile_init = new Scanner(new FileReader(paramsFileName));
-    } catch (FileNotFoundException e) {
-      System.err.println("FileNotFoundException in PROCore.processParamFile(): " + e.getMessage());
-      System.exit(99901);
-    }
-
-    String dummy = "";
-
-    // initialize lambda[] and other related arrays
-    for (int c = 1; c <= numParams; ++c) {
-      // skip parameter name
-      while (!dummy.equals("|||")) {
-        dummy = inFile_init.next();
-      }
-
-      // read default value
-      lambda.set(c, inFile_init.nextDouble());
-      defaultLambda[c] = lambda.get(c).doubleValue();
-
-      // read isOptimizable
-      dummy = inFile_init.next();
-      if (dummy.equals("Opt")) {
-        isOptimizable[c] = true;
-      } else if (dummy.equals("Fix")) {
-        isOptimizable[c] = false;
-      } else {
-        println("Unknown isOptimizable string " + dummy + " (must be either Opt or Fix)");
-        System.exit(21);
-      }
-
-      if (!isOptimizable[c]) { // skip next two values
-        dummy = inFile_init.next();
-        dummy = inFile_init.next();
-        dummy = inFile_init.next();
-        dummy = inFile_init.next();
-      } else {
-        // the next two values are not used, only to be consistent with ZMERT's params file format
-        dummy = inFile_init.next();
-        dummy = inFile_init.next();
-        // set minRandValue[c] and maxRandValue[c] (range for random values)
-        dummy = inFile_init.next();
-        if (dummy.equals("-Inf") || dummy.equals("+Inf")) {
-          println("minRandValue[" + c + "] cannot be -Inf or +Inf!");
-          System.exit(21);
-        } else {
-          minRandValue[c] = Double.parseDouble(dummy);
-        }
-
-        dummy = inFile_init.next();
-        if (dummy.equals("-Inf") || dummy.equals("+Inf")) {
-          println("maxRandValue[" + c + "] cannot be -Inf or +Inf!");
-          System.exit(21);
-        } else {
-          maxRandValue[c] = Double.parseDouble(dummy);
-        }
-
-        // check for illogical values
-        if (minRandValue[c] > maxRandValue[c]) {
-          println("minRandValue[" + c + "]=" + minRandValue[c] + " > " + maxRandValue[c]
-              + "=maxRandValue[" + c + "]!");
-          System.exit(21);
-        }
-
-        // check for odd values
-        if (minRandValue[c] == maxRandValue[c]) {
-          println("Warning: lambda[" + c + "] has " + "minRandValue = maxRandValue = "
-              + minRandValue[c] + ".", 1);
-        }
-      } // if (!isOptimizable[c])
-
-      /*
-       * precision[c] = inFile_init.nextDouble(); if (precision[c] < 0) { println("precision[" + c +
-       * "]=" + precision[c] + " < 0!  Must be non-negative."); System.exit(21); }
-       */
-
-    }
-
-    // set normalizationOptions[]
-    String origLine = "";
-    while (origLine != null && origLine.length() == 0) {
-      origLine = inFile_init.nextLine();
-    }
-
-    // How should a lambda[] vector be normalized (before decoding)?
-    // nO[0] = 0: no normalization
-    // nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1]
-    // nO[0] = 2: scale so that the maximum absolute value is nO[1]
-    // nO[0] = 3: scale so that the minimum absolute value is nO[1]
-    // nO[0] = 4: scale so that the L-nO[1] norm equals nO[2]
-
-    // normalization = none
-    // normalization = absval 1 lm
-    // normalization = maxabsval 1
-    // normalization = minabsval 1
-    // normalization = LNorm 2 1
-
-    dummy = (origLine.substring(origLine.indexOf("=") + 1)).trim();
-    String[] dummyA = dummy.split("\\s+");
-
-    if (dummyA[0].equals("none")) {
-      normalizationOptions[0] = 0;
-    } else if (dummyA[0].equals("absval")) {
-      normalizationOptions[0] = 1;
-      normalizationOptions[1] = Double.parseDouble(dummyA[1]);
-      String pName = dummyA[2];
-      for (int i = 3; i < dummyA.length; ++i) { // in case parameter name has multiple words
-        pName = pName + " " + dummyA[i];
-      }
-      normalizationOptions[2] = Vocabulary.id(pName);
-
-      if (normalizationOptions[1] <= 0) {
-        println("Value for the absval normalization method must be positive.");
-        System.exit(21);
-      }
-      if (normalizationOptions[2] == 0) {
-        println("Unrecognized feature name " + normalizationOptions[2]
-            + " for absval normalization method.", 1);
-        System.exit(21);
-      }
-    } else if (dummyA[0].equals("maxabsval")) {
-      normalizationOptions[0] = 2;
-      normalizationOptions[1] = Double.parseDouble(dummyA[1]);
-      if (normalizationOptions[1] <= 0) {
-        println("Value for the maxabsval normalization method must be positive.");
-        System.exit(21);
-      }
-    } else if (dummyA[0].equals("minabsval")) {
-      normalizationOptions[0] = 3;
-      normalizationOptions[1] = Double.parseDouble(dummyA[1]);
-      if (normalizationOptions[1] <= 0) {
-        println("Value for the minabsval normalization method must be positive.");
-        System.exit(21);
-      }
-    } else if (dummyA[0].equals("LNorm")) {
-      normalizationOptions[0] = 4;
-      normalizationOptions[1] = Double.parseDouble(dummyA[1]);
-      normalizationOptions[2] = Double.parseDouble(dummyA[2]);
-      if (normalizationOptions[1] <= 0 || normalizationOptions[2] <= 0) {
-        println("Both values for the LNorm normalization method must be positive.");
-        System.exit(21);
-      }
-    } else {
-      println("Unrecognized normalization method " + dummyA[0] + "; "
-          + "must be one of none, absval, maxabsval, and LNorm.");
-      System.exit(21);
-    } // if (dummyA[0])
-
-    inFile_init.close();
-  } // processParamFile()
-
-  private void processDocInfo() {
-    // sets numDocuments and docOfSentence[]
-    docOfSentence = new int[numSentences];
-
-    if (docInfoFileName == null) {
-      for (int i = 0; i < numSentences; ++i)
-        docOfSentence[i] = 0;
-      numDocuments = 1;
-    } else {
-
-      try {
-
-        // 4 possible formats:
-        // 1) List of numbers, one per document, indicating # sentences in each document.
-        // 2) List of "docName size" pairs, one per document, indicating name of document and #
-        // sentences.
-        // 3) List of docName's, one per sentence, indicating which doument each sentence belongs
-        // to.
-        // 4) List of docName_number's, one per sentence, indicating which doument each sentence
-        // belongs to,
-        // and its order in that document. (can also use '-' instead of '_')
-
-        int docInfoSize = countNonEmptyLines(docInfoFileName);
-
-        if (docInfoSize < numSentences) { // format #1 or #2
-          numDocuments = docInfoSize;
-          int i = 0;
-
-          BufferedReader inFile = new BufferedReader(new FileReader(docInfoFileName));
-          String line = inFile.readLine();
-          boolean format1 = (!(line.contains(" ")));
-
-          for (int doc = 0; doc < numDocuments; ++doc) {
-
-            if (doc != 0)
-              line = inFile.readLine();
-
-            int docSize = 0;
-            if (format1) {
-              docSize = Integer.parseInt(line);
-            } else {
-              docSize = Integer.parseInt(line.split("\\s+")[1]);
-            }
-
-            for (int i2 = 1; i2 <= docSize; ++i2) {
-              docOfSentence[i] = doc;
-              ++i;
-            }
-
-          }
-
-          // now i == numSentences
-
-          inFile.close();
-
-        } else if (docInfoSize == numSentences) { // format #3 or #4
-
-          boolean format3 = false;
-
-          HashSet<String> seenStrings = new HashSet<String>();
-          BufferedReader inFile = new BufferedReader(new FileReader(docInfoFileName));
-          for (int i = 0; i < numSentences; ++i) {
-            // set format3 = true if a duplicate is found
-            String line = inFile.readLine();
-            if (seenStrings.contains(line))
-              format3 = true;
-            seenStrings.add(line);
-          }
-
-          inFile.close();
-
-          HashSet<String> seenDocNames = new HashSet<String>();
-          HashMap<String, Integer> docOrder = new HashMap<String, Integer>();
-          // maps a document name to the order (0-indexed) in which it was seen
-
-          inFile = new BufferedReader(new FileReader(docInfoFileName));
-          for (int i = 0; i < numSentences; ++i) {
-            String line = inFile.readLine();
-
-            String docName = "";
-            if (format3) {
-              docName = line;
-            } else {
-              int sep_i = Math.max(line.lastIndexOf('_'), line.lastIndexOf('-'));
-              docName = line.substring(0, sep_i);
-            }
-
-            if (!seenDocNames.contains(docName)) {
-              seenDocNames.add(docName);
-              docOrder.put(docName, seenDocNames.size() - 1);
-            }
-
-            int docOrder_i = docOrder.get(docName);
-
-            docOfSentence[i] = docOrder_i;
-
-          }
-
-          inFile.close();
-
-          numDocuments = seenDocNames.size();
-
-        } else { // badly formatted
-
-        }
-
-      } catch (FileNotFoundException e) {
-        System.err.println("FileNotFoundException in PROCore.processDocInfo(): " + e.getMessage());
-        System.exit(99901);
-      } catch (IOException e) {
-        System.err.println("IOException in PROCore.processDocInfo(): " + e.getMessage());
-        System.exit(99902);
-      }
-    }
-
-  }
-
-  private boolean copyFile(String origFileName, String newFileName) {
-    try {
-      File inputFile = new File(origFileName);
-      File outputFile = new File(newFileName);
-
-      InputStream in = new FileInputStream(inputFile);
-      OutputStream out = new FileOutputStream(outputFile);
-
-      byte[] buffer = new byte[1024];
-      int len;
-      while ((len = in.read(buffer)) > 0) {
-        out.write(buffer, 0, len);
-      }
-      in.close();
-      out.close();
-
-      /*
-       * InputStream inStream = new FileInputStream(new File(origFileName)); BufferedReader inFile =
-       * new BufferedReader(new InputStreamReader(inStream, "utf8"));
-       * 
-       * FileOutputStream outStream = new FileOutputStream(newFileName, false); OutputStreamWriter
-       * outStreamWriter = new OutputStreamWriter(outStream, "utf8"); BufferedWriter outFile = new
-       * BufferedWriter(outStreamWriter);
-       * 
-       * String line; while(inFile.ready()) { line = inFile.readLine(); writeLine(line, outFile); }
-       * 
-       * inFile.close(); outFile.close();
-       */
-      return true;
-    } catch (FileNotFoundException e) {
-      System.err.println("FileNotFoundException in PROCore.copyFile(String,String): "
-          + e.getMessage());
-      return false;
-    } catch (IOException e) {
-      System.err.println("IOException in PROCore.copyFile(String,String): " + e.getMessage());
-      return false;
-    }
-  }
-
-  private void renameFile(String origFileName, String newFileName) {
-    if (fileExists(origFileName)) {
-      deleteFile(newFileName);
-      File oldFile = new File(origFileName);
-      File newFile = new File(newFileName);
-      if (!oldFile.renameTo(newFile)) {
-        println("Warning: attempt to rename " + origFileName + " to " + newFileName
-            + " was unsuccessful!", 1);
-      }
-    } else {
-      println("Warning: file " + origFileName + " does not exist! (in PROCore.renameFile)", 1);
-    }
-  }
-
-  private void deleteFile(String fileName) {
-    if (fileExists(fileName)) {
-      File fd = new File(fileName);
-      if (!fd.delete()) {
-        println("Warning: attempt to delete " + fileName + " was unsuccessful!", 1);
-      }
-    }
-  }
-
-  private void writeLine(String line, BufferedWriter writer) throws IOException {
-    writer.write(line, 0, line.length());
-    writer.newLine();
-    writer.flush();
-  }
-
-  // need to re-write to handle different forms of lambda
-  public void finish() {
-    if (myDecoder != null) {
-      myDecoder.cleanUp();
-    }
-
-    // create config file with final values
-    createConfigFile(lambda, decoderConfigFileName + ".PRO.final", decoderConfigFileName
-        + ".PRO.orig");
-
-    // delete current decoder config file and decoder output
-    deleteFile(decoderConfigFileName);
-    deleteFile(decoderOutFileName);
-
-    // restore original name for config file (name was changed
-    // in initialize() so it doesn't get overwritten)
-    renameFile(decoderConfigFileName + ".PRO.orig", decoderConfigFileName);
-
-    if (finalLambdaFileName != null) {
-      try {
-        PrintWriter outFile_lambdas = new PrintWriter(finalLambdaFileName);
-        for (int c = 1; c <= numParams; ++c) {
-          outFile_lambdas.println(Vocabulary.word(c) + " ||| " + lambda.get(c).doubleValue());
-        }
-        outFile_lambdas.close();
-
-      } catch (IOException e) {
-        System.err.println("IOException in PROCore.finish(): " + e.getMessage());
-        System.exit(99902);
-      }
-    }
-
-  }
-
-  private String[] cfgFileToArgsArray(String fileName) {
-    checkFile(fileName);
-
-    Vector<String> argsVector = new Vector<String>();
-
-    BufferedReader inFile = null;
-    try {
-      inFile = new BufferedReader(new FileReader(fileName));
-      String line, origLine;
-      do {
-        line = inFile.readLine();
-        origLine = line; // for error reporting purposes
-
-        if (line != null && line.length() > 0 && line.charAt(0) != '#') {
-
-          if (line.indexOf("#") != -1) { // discard comment
-            line = line.substring(0, line.indexOf("#"));
-          }
-
-          line = line.trim();
-
-          // now line should look like "-xxx XXX"
-
-          /*
-           * OBSOLETE MODIFICATION //SPECIAL HANDLING FOR PRO CLASSIFIER PARAMETERS String[] paramA
-           * = line.split("\\s+");
-           * 
-           * if( paramA[0].equals("-classifierParams") ) { String classifierParam = ""; for(int p=1;
-           * p<=paramA.length-1; p++) classifierParam += paramA[p]+" ";
-           * 
-           * if(paramA.length>=2) { String[] tmpParamA = new String[2]; tmpParamA[0] = paramA[0];
-           * tmpParamA[1] = classifierParam; paramA = tmpParamA; } else {
-           * println("Malformed line in config file:"); println(origLine); System.exit(70); } }//END
-           * MODIFICATION
-           */
-
-          // CMU MODIFICATION(FROM METEOR FOR ZMERT)
-          // Parse args
-          ArrayList<String> argList = new ArrayList<String>();
-          StringBuilder arg = new StringBuilder();
-          boolean quoted = false;
-          for (int i = 0; i < line.length(); i++) {
-            if (Character.isWhitespace(line.charAt(i))) {
-              if (quoted)
-                arg.append(line.charAt(i));
-              else if (arg.length() > 0) {
-                argList.add(arg.toString());
-                arg = new StringBuilder();
-              }
-            } else if (line.charAt(i) == '\'') {
-              if (quoted) {
-                argList.add(arg.toString());
-                arg = new StringBuilder();
-              }
-              quoted = !quoted;
-            } else
-              arg.append(line.charAt(i));
-          }
-          if (arg.length() > 0)
-            argList.add(arg.toString());
-          // Create paramA
-          String[] paramA = new String[argList.size()];
-          for (int i = 0; i < paramA.length; paramA[i] = argList.get(i++))
-            ;
-          // END CMU MODIFICATION
-
-          if (paramA.length == 2 && paramA[0].charAt(0) == '-') {
-            argsVector.add(paramA[0]);
-            argsVector.add(paramA[1]);
-          } else if (paramA.length > 2 && (paramA[0].equals("-m") || paramA[0].equals("-docSet"))) {
-            // -m (metricName), -docSet are allowed to have extra optinos
-            for (int opt = 0; opt < paramA.length; ++opt) {
-              argsVector.add(paramA[opt]);
-            }
-          } else {
-            println("Malformed line in config file:");
-            println(origLine);
-            System.exit(70);
-          }
-
-        }
-      } while (line != null);
-
-      inFile.close();
-    } catch (FileNotFoundException e) {
-      println("PRO configuration file " + fileName + " was not found!");
-      System.err.println("FileNotFoundException in PROCore.cfgFileToArgsArray(String): "
-          + e.getMessage());
-      System.exit(99901);
-    } catch (IOException e) {
-      System.err.println("IOException in PROCore.cfgFileToArgsArray(String): " + e.getMessage());
-      System.exit(99902);
-    }
-
-    String[] argsArray = new String[argsVector.size()];
-
-    for (int i = 0; i < argsVector.size(); ++i) {
-      argsArray[i] = argsVector.elementAt(i);
-    }
-
-    return argsArray;
-  }
-
-  private void processArgsArray(String[] args) {
-    processArgsArray(args, true);
-  }
-
-  private void processArgsArray(String[] args, boolean firstTime) {
-    /* set default values */
-    // Relevant files
-    dirPrefix = null;
-    sourceFileName = null;
-    refFileName = "reference.txt";
-    refsPerSen = 1;
-    textNormMethod = 1;
-    paramsFileName = "params.txt";
-    docInfoFileName = null;
-    finalLambdaFileName = null;
-    // MERT specs
-    metricName = "BLEU";
-    metricName_display = metricName;
-    metricOptions = new String[2];
-    metricOptions[0] = "4";
-    metricOptions[1] = "closest";
-    docSubsetInfo = new int[7];
-    docSubsetInfo[0] = 0;
-    maxMERTIterations = 20;
-    prevMERTIterations = 20;
-    minMERTIterations = 5;
-    stopMinIts = 3;
-    stopSigValue = -1;
-    //
-    // /* possibly other early stopping criteria here */
-    //
-    numOptThreads = 1;
-    saveInterFiles = 3;
-    compressFiles = 0;
-    oneModificationPerIteration = false;
-    randInit = false;
-    seed = System.currentTimeMillis();
-    // useDisk = 2;
-    // Decoder specs
-    decoderCommandFileName = null;
-    passIterationToDecoder = false;
-    decoderOutFileName = "output.nbest";
-    validDecoderExitValue = 0;
-    decoderConfigFileName = "dec_cfg.txt";
-    sizeOfNBest = 100;
-    fakeFileNameTemplate = null;
-    fakeFileNamePrefix = null;
-    fakeFileNameSuffix = null;
-    // Output specs
-    verbosity = 1;
-    decVerbosity = 0;
-
-    int i = 0;
-
-    while (i < args.length) {
-      String option = args[i];
-      // Relevant files
-      if (option.equals("-dir")) {
-        dirPrefix = args[i + 1];
-      } else if (option.equals("-s")) {
-        sourceFileName = args[i + 1];
-      } else if (option.equals("-r")) {
-        refFileName = args[i + 1];
-      } else if (option.equals("-rps")) {
-        refsPerSen = Integer.parseInt(args[i + 1]);
-        if (refsPerSen < 1) {
-          println("refsPerSen must be positive.");
-          System.exit(10);
-        }
-      } else if (option.equals("-txtNrm")) {
-        textNormMethod = Integer.parseInt(args[i + 1]);
-        if (textNormMethod < 0 || textNormMethod > 4) {
-          println("textNormMethod should be between 0 and 4");
-          System.exit(10);
-        }
-      } else if (option.equals("-p")) {
-        paramsFileName = args[i + 1];
-      } else if (option.equals("-docInfo")) {
-        docInfoFileName = args[i + 1];
-      } else if (option.equals("-fin")) {
-        finalLambdaFileName = args[i + 1];
-        // MERT specs
-      } else if (option.equals("-m")) {
-        metricName = args[i + 1];
-        metricName_display = metricName;
-        if (EvaluationMetric.knownMetricName(metricName)) {
-          int optionCount = EvaluationMetric.metricOptionCount(metricName);
-          metricOptions = new String[optionCount];
-          for (int opt = 0; opt < optionCount; ++opt) {
-            metricOptions[opt] = args[i + opt + 2];
-          }
-          i += optionCount;
-        } else {
-          println("Unknown metric name " + metricName + ".");
-          System.exit(10);
-        }
-      } else if (option.equals("-docSet")) {
-        String method = args[i + 1];
-
-        if (method.equals("all")) {
-          docSubsetInfo[0] = 0;
-          i += 0;
-        } else if (method.equals("bottom")) {
-          String a = args[i + 2];
-          if (a.endsWith("d")) {
-            docSubsetInfo[0] = 1;
-            a = a.substring(0, a.indexOf("d"));
-          } else {
-            docSubsetInfo[0] = 2;
-            a = a.substring(0, a.indexOf("%"));
-          }
-          docSubsetInfo[5] = Integer.parseInt(a);
-          i += 1;
-        } else if (method.equals("top")) {
-          String a = args[i + 2];
-          if (a.endsWith("d")) {
-            docSubsetInfo[0] = 3;
-            a = a.substring(0, a.indexOf("d"));
-          } else {
-            docSubsetInfo[0] = 4;
-            a = a.substring(0, a.indexOf("%"));
-          }
-          docSubsetInfo[5] = Integer.parseInt(a);
-          i += 1;
-        } else if (method.equals("window")) {
-          String a1 = args[i + 2];
-          a1 = a1.substring(0, a1.indexOf("d")); // size of window
-          String a2 = args[i + 4];
-          if (a2.indexOf("p") > 0) {
-            docSubsetInfo[0] = 5;
-            a2 = a2.substring(0, a2.indexOf("p"));
-          } else {
-            docSubsetInfo[0] = 6;
-            a2 = a2.substring(0, a2.indexOf("r"));
-          }
-          docSubsetInfo[5] = Integer.parseInt(a1);
-          docSubsetInfo[6] = Integer.parseInt(a2);
-          i += 3;
-        } else {
-          println("Unknown docSet method " + method + ".");
-          System.exit(10);
-        }
-      } else if (option.equals("-maxIt")) {
-        maxMERTIterations = Integer.parseInt(args[i + 1]);
-        if (maxMERTIterations < 1) {
-          println("maxMERTIts must be positive.");
-          System.exit(10);
-        }
-      } else if (option.equals("-minIt")) {
-        minMERTIterations = Integer.parseInt(args[i + 1]);
-        if (minMERTIterations < 1) {
-          println("minMERTIts must be positive.");
-          System.exit(10);
-        }
-      } else if (option.equals("-prevIt")) {
-        prevMERTIterations = Integer.parseInt(args[i + 1]);
-        if (prevMERTIterations < 0) {
-          println("prevMERTIts must be non-negative.");
-          System.exit(10);
-        }
-      } else if (option.equals("-stopIt")) {
-        stopMinIts = Integer.parseInt(args[i + 1]);
-        if (stopMinIts < 1) {
-          println("stopMinIts must be positive.");
-          System.exit(10);
-        }
-      } else if (option.equals("-stopSig")) {
-        stopSigValue = Double.parseDouble(args[i + 1]);
-      }
-      //
-      // /* possibly other early stopping criteria here */
-      //
-      else if (option.equals("-thrCnt")) {
-        numOptThreads = Integer.parseInt(args[i + 1]);
-        if (numOptThreads < 1) {
-          println("threadCount must be positive.");
-          System.exit(10);
-        }
-      } else if (option.equals("-save")) {
-        saveInterFiles = Integer.parseInt(args[i + 1]);
-        if (saveInterFiles < 0 || saveInterFiles > 3) {
-          println("save should be between 0 and 3");
-          System.exit(10);
-        }
-      } else if (option.equals("-compress")) {
-        compressFiles = Integer.parseInt(args[i + 1]);
-        if (compressFiles < 0 || compressFiles > 1) {
-          println("compressFiles should be either 0 or 1");
-          System.exit(10);
-        }
-      } else if (option.equals("-opi")) {
-        int opi = Integer.parseInt(args[i + 1]);
-        if (opi == 1) {
-          oneModificationPerIteration = true;
-        } else if (opi == 0) {
-          oneModificationPerIteration = false;
-        } else {
-          println("oncePerIt must be either 0 or 1.");
-

<TRUNCATED>