You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/30 21:04:51 UTC

[06/17] incubator-joshua git commit: Merge branch 'master' into 7-with-master

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b0b70627/joshua-core/src/main/java/org/apache/joshua/zmert/MertCore.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/zmert/MertCore.java
index 4110c97,0000000..abed07a
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/zmert/MertCore.java
+++ b/joshua-core/src/main/java/org/apache/joshua/zmert/MertCore.java
@@@ -1,3191 -1,0 +1,3048 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.zmert;
 +
 +import java.io.BufferedReader;
 +import java.io.BufferedWriter;
 +import java.io.File;
 +import java.io.FileInputStream;
 +import java.io.FileNotFoundException;
 +import java.io.FileOutputStream;
 +import java.io.FileReader;
 +import java.io.IOException;
 +import java.io.InputStream;
 +import java.io.InputStreamReader;
 +import java.io.ObjectInputStream;
 +import java.io.ObjectOutputStream;
 +import java.io.OutputStream;
 +import java.io.OutputStreamWriter;
 +import java.io.PrintWriter;
 +import java.text.DecimalFormat;
 +import java.util.ArrayList;
 +import java.util.Date;
 +import java.util.HashMap;
 +import java.util.HashSet;
 +import java.util.Random;
 +import java.util.Scanner;
 +import java.util.TreeSet;
 +import java.util.Vector;
 +import java.util.concurrent.ConcurrentHashMap;
 +import java.util.concurrent.ExecutorService;
 +import java.util.concurrent.Executors;
 +import java.util.concurrent.Semaphore;
 +import java.util.zip.GZIPInputStream;
 +import java.util.zip.GZIPOutputStream;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.metrics.EvaluationMetric;
 +import org.apache.joshua.util.StreamGobbler;
++import org.apache.joshua.util.io.ExistingUTF8EncodedTextFile;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +/**
 + * This code was originally written by Omar Zaidan.  In September of 2012, it was augmented to support
 + * a sparse feature implementation.
-  * 
++ *
 + * @author Omar Zaidan
 + */
 +
 +public class MertCore {
 +
 +  private static final Logger LOG = LoggerFactory.getLogger(MertCore.class);
 +
 +  private final JoshuaConfiguration joshuaConfiguration;
 +  private TreeSet<Integer>[] indicesOfInterest_all;
 +
 +  private final static DecimalFormat f4 = new DecimalFormat("###0.0000");
-   private final Runtime myRuntime = Runtime.getRuntime();
 +
 +  private final static double NegInf = (-1.0 / 0.0);
 +  private final static double PosInf = (+1.0 / 0.0);
 +  private final static double epsilon = 1.0 / 1000000;
 +
 +  private int verbosity; // anything of priority <= verbosity will be printed
 +                         // (lower value for priority means more important)
 +
 +  private Random randGen;
 +  private int generatedRands;
 +
 +  private int numSentences;
 +  // number of sentences in the dev set
 +  // (aka the "MERT training" set)
 +
 +  private int numDocuments;
 +  // number of documents in the dev set
 +  // this should be 1, unless doing doc-level optimization
 +
 +  private int[] docOfSentence;
 +  // docOfSentence[i] stores which document contains the i'th sentence.
 +  // docOfSentence is 0-indexed, as are the documents (i.e. first doc is indexed 0)
 +
 +  private int[] docSubsetInfo;
 +  // stores information regarding which subset of the documents are evaluated
 +  // [0]: method (0-6)
 +  // [1]: first (1-indexed)
 +  // [2]: last (1-indexed)
 +  // [3]: size
 +  // [4]: center
 +  // [5]: arg1
 +  // [6]: arg2
 +  // [1-6] are 0 for method 0, [6] is 0 for methods 1-4 as well
 +  // only [1] and [2] are needed for optimization. The rest are only needed for an output message.
 +
 +  private int refsPerSen;
 +  // number of reference translations per sentence
 +
 +  private int textNormMethod;
 +  // 0: no normalization, 1: "NIST-style" tokenization, and also rejoin 'm, 're, *'s, 've, 'll, 'd,
 +  // and n't,
 +  // 2: apply 1 and also rejoin dashes between letters, 3: apply 1 and also drop non-ASCII
 +  // characters
 +  // 4: apply 1+2+3
 +
 +  private int numParams;
 +  // number of features for the log-linear model
 +
 +  private double[] normalizationOptions;
 +  // How should a lambda[] vector be normalized (before decoding)?
 +  // nO[0] = 0: no normalization
 +  // nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1]
 +  // nO[0] = 2: scale so that the maximum absolute value is nO[1]
 +  // nO[0] = 3: scale so that the minimum absolute value is nO[1]
 +  // nO[0] = 4: scale so that the L-nO[1] norm equals nO[2]
 +
 +  /* *********************************************************** */
 +  /* NOTE: indexing starts at 1 in the following few arrays: */
 +  /* *********************************************************** */
 +
 +  private String[] paramNames;
 +  // feature names, needed to read/create config file
 +
 +  private double[] lambda;
 +  // the current weight vector. NOTE: indexing starts at 1.
 +
 +  private boolean[] isOptimizable;
 +  // isOptimizable[c] = true iff lambda[c] should be optimized
 +
 +  private double[] minThValue;
 +  private double[] maxThValue;
 +  // when investigating thresholds along the lambda[c] dimension, only values
 +  // in the [minThValue[c],maxThValue[c]] range will be considered.
 +  // (*) minThValue and maxThValue can be real values as well as -Infinity and +Infinity
 +  // (coded as -Inf and +Inf, respectively, in an input file)
 +
 +  private double[] minRandValue;
 +  private double[] maxRandValue;
 +  // when choosing a random value for the lambda[c] parameter, it will be
 +  // chosen from the [minRandValue[c],maxRandValue[c]] range.
 +  // (*) minRandValue and maxRandValue must be real values, but not -Inf or +Inf
 +
 +  private int damianos_method;
 +  private double damianos_param;
 +  private double damianos_mult;
 +
 +  private double[] defaultLambda;
 +  // "default" parameter values; simply the values read in the parameter file
 +
 +  /* *********************************************************** */
 +  /* *********************************************************** */
 +
 +  private Decoder myDecoder;
 +  // COMMENT OUT if decoder is not Joshua
 +
 +  private String decoderCommand;
 +  // the command that runs the decoder; read from decoderCommandFileName
 +
 +  private int decVerbosity;
 +  // verbosity level for decoder output. If 0, decoder output is ignored.
 +  // If 1, decoder output is printed.
 +
 +  private int validDecoderExitValue;
 +  // return value from running the decoder command that indicates success
 +
 +  private int numOptThreads;
 +  // number of threads to run things in parallel
 +
 +  private int saveInterFiles;
 +  // 0: nothing, 1: only configs, 2: only n-bests, 3: both configs and n-bests
 +
 +  private int compressFiles;
 +  // should Z-MERT gzip the large files? If 0, no compression takes place.
 +  // If 1, compression is performed on: decoder output files, temp sents files,
 +  // and temp feats files.
 +
 +  private int sizeOfNBest;
 +  // size of N-best list generated by decoder at each iteration
 +  // (aka simply N, but N is a bad variable name)
 +
 +  private long seed;
 +  // seed used to create random number generators
 +
 +  private boolean randInit;
 +  // if true, parameters are initialized randomly. If false, parameters
 +  // are initialized using values from parameter file.
 +
 +  private int initsPerIt;
 +  // number of intermediate initial points per iteration
 +
 +  private int maxMERTIterations, minMERTIterations, prevMERTIterations;
 +  // max: maximum number of MERT iterations
 +  // min: minimum number of MERT iterations before an early MERT exit
 +  // prev: number of previous MERT iterations from which to consider candidates (in addition to
 +  // the candidates from the current iteration)
 +
 +  private double stopSigValue;
 +  // early MERT exit if no weight changes by more than stopSigValue
 +  // (but see minMERTIterations above and stopMinIts below)
 +
 +  private int stopMinIts;
 +  // some early stopping criterion must be satisfied in stopMinIts *consecutive* iterations
 +  // before an early exit (but see minMERTIterations above)
 +
 +  private boolean oneModificationPerIteration;
 +  // if true, each MERT iteration performs at most one parameter modification.
 +  // If false, a new MERT iteration starts (i.e. a new N-best list is
 +  // generated) only after the previous iteration reaches a local maximum.
 +
 +  private String metricName;
 +  // name of evaluation metric optimized by MERT
 +
 +  private String metricName_display;
 +  // name of evaluation metric optimized by MERT, possibly with "doc-level " prefixed
 +
 +  private String[] metricOptions;
 +  // options for the evaluation metric (e.g. for BLEU, maxGramLength and effLengthMethod)
 +
 +  private EvaluationMetric evalMetric;
 +  // the evaluation metric used by MERT
 +
 +  private int suffStatsCount;
 +  // number of sufficient statistics for the evaluation metric
 +
 +  private String tmpDirPrefix;
 +  // prefix for the ZMERT.temp.* files
 +
 +  private boolean passIterationToDecoder;
 +  // should the iteration number be passed as an argument to decoderCommandFileName?
 +  // If 1, iteration number is passed. If 0, launch with no arguments.
 +
 +  private String dirPrefix; // where are all these files located?
 +  private String paramsFileName, docInfoFileName, finalLambdaFileName;
 +  private String sourceFileName, refFileName, decoderOutFileName;
 +  private String decoderConfigFileName, decoderCommandFileName;
 +  private String fakeFileNameTemplate, fakeFileNamePrefix, fakeFileNameSuffix;
 +
 +  // e.g. output.it[1-x].someOldRun would be specified as:
 +  // output.it?.someOldRun
 +  // and we'd have prefix = "output.it" and suffix = ".sameOldRun"
 +
 +  // private int useDisk;
 +
-   public MertCore(JoshuaConfiguration joshuaConfiguration) 
++  public MertCore(JoshuaConfiguration joshuaConfiguration)
 +  {
 +    this.joshuaConfiguration = joshuaConfiguration;
 +  }
 +
-   public MertCore(String[] args, JoshuaConfiguration joshuaConfiguration) {
++  public MertCore(String[] args, JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException {
 +    this.joshuaConfiguration = joshuaConfiguration;
 +    EvaluationMetric.set_knownMetrics();
 +    processArgsArray(args);
 +    initialize(0);
 +  }
 +
-   public MertCore(String configFileName,JoshuaConfiguration joshuaConfiguration) {
++  public MertCore(String configFileName,JoshuaConfiguration joshuaConfiguration) throws FileNotFoundException, IOException {
 +    this.joshuaConfiguration = joshuaConfiguration;
 +    EvaluationMetric.set_knownMetrics();
 +    processArgsArray(cfgFileToArgsArray(configFileName));
 +    initialize(0);
 +  }
 +
-   private void initialize(int randsToSkip) {
++  private void initialize(int randsToSkip) throws FileNotFoundException, IOException {
 +    println("NegInf: " + NegInf + ", PosInf: " + PosInf + ", epsilon: " + epsilon, 4);
 +
 +    randGen = new Random(seed);
 +    for (int r = 1; r <= randsToSkip; ++r) {
 +      randGen.nextDouble();
 +    }
 +    generatedRands = randsToSkip;
 +
 +    if (randsToSkip == 0) {
 +      println("----------------------------------------------------", 1);
 +      println("Initializing...", 1);
 +      println("----------------------------------------------------", 1);
 +      println("", 1);
 +
 +      println("Random number generator initialized using seed: " + seed, 1);
 +      println("", 1);
 +    }
 +
 +    if (refsPerSen > 1) {
 +      String refFile = refFileName + "0";
 +      if (! new File(refFile).exists())
 +        refFile = refFileName + ".0";
 +      if (! new File(refFile).exists()) {
-         throw new RuntimeException(String.format("* FATAL: can't find first reference file '%s{0,.0}'", refFileName));
++        throw new IOException(String.format("* FATAL: can't find first reference file '%s{0,.0}'", refFileName));
 +      }
 +
-       numSentences = countLines(refFile);
++      numSentences = new ExistingUTF8EncodedTextFile(refFile).getNumberOfLines();
 +    } else {
-       numSentences = countLines(refFileName);
++      numSentences = new ExistingUTF8EncodedTextFile(refFileName).getNumberOfLines();
 +    }
 +
 +    processDocInfo();
 +    // sets numDocuments and docOfSentence[]
 +
 +    if (numDocuments > 1) metricName_display = "doc-level " + metricName;
 +
 +    set_docSubsetInfo(docSubsetInfo);
 +
 +
 +
-     numParams = countNonEmptyLines(paramsFileName) - 1;
++    numParams = new ExistingUTF8EncodedTextFile(paramsFileName).getNumberOfNonEmptyLines() - 1;
 +    // the parameter file contains one line per parameter
 +    // and one line for the normalization method
 +
 +
 +    paramNames = new String[1 + numParams];
 +    lambda = new double[1 + numParams]; // indexing starts at 1 in these arrays
 +    isOptimizable = new boolean[1 + numParams];
 +    minThValue = new double[1 + numParams];
 +    maxThValue = new double[1 + numParams];
 +    minRandValue = new double[1 + numParams];
 +    maxRandValue = new double[1 + numParams];
 +    // precision = new double[1+numParams];
 +    defaultLambda = new double[1 + numParams];
 +    normalizationOptions = new double[3];
 +
 +    try {
 +      // read parameter names
 +      BufferedReader inFile_names = new BufferedReader(new FileReader(paramsFileName));
 +
 +      for (int c = 1; c <= numParams; ++c) {
 +        String line = "";
 +        while (line != null && line.length() == 0) { // skip empty lines
 +          line = inFile_names.readLine();
 +        }
 +        String paramName = (line.substring(0, line.indexOf("|||"))).trim();
 +        paramNames[c] = paramName;
 +      }
 +
 +      inFile_names.close();
 +    } catch (IOException e) {
 +      throw new RuntimeException(e);
 +    }
 +
 +    processParamFile();
 +    // sets the arrays declared just above
 +
 +    // SentenceInfo.createV(); // uncomment ONLY IF using vocabulary implementation of SentenceInfo
 +
 +
 +    String[][] refSentences = new String[numSentences][refsPerSen];
 +
 +    try {
 +
 +      // read in reference sentences
 +      BufferedReader reference_readers[] = new BufferedReader[refsPerSen];
 +      if (refsPerSen == 1) {
 +        reference_readers[0] = new BufferedReader(new InputStreamReader(new FileInputStream(new File(refFileName)), "utf8"));
 +      } else {
 +        for (int i = 0; i < refsPerSen; i++) {
 +          String refFile = refFileName + i;
 +          if (! new File(refFile).exists())
 +            refFile = refFileName + "." + i;
 +          if (! new File(refFile).exists()) {
 +            throw new RuntimeException(String.format("* FATAL: can't find reference file '%s'", refFile));
 +          }
 +
 +          reference_readers[i] = new BufferedReader(new InputStreamReader(new FileInputStream(new File(refFile)), "utf8"));
 +        }
 +      }
-         
++
 +      for (int i = 0; i < numSentences; ++i) {
 +        for (int r = 0; r < refsPerSen; ++r) {
 +          // read the rth reference translation for the ith sentence
 +          refSentences[i][r] = normalize(reference_readers[r].readLine(), textNormMethod);
 +        }
 +      }
 +
 +      // close all the reference files
-       for (int i = 0; i < refsPerSen; i++) 
++      for (int i = 0; i < refsPerSen; i++)
 +        reference_readers[i].close();
 +
 +      // read in decoder command, if any
 +      decoderCommand = null;
 +      if (decoderCommandFileName != null) {
 +        if (fileExists(decoderCommandFileName)) {
 +          BufferedReader inFile_comm = new BufferedReader(new FileReader(decoderCommandFileName));
 +          decoderCommand = inFile_comm.readLine();
 +          inFile_comm.close();
 +        }
 +      }
 +    } catch (IOException e) {
 +      throw new RuntimeException(e);
 +    }
 +
 +
 +    // set static data members for the EvaluationMetric class
 +    EvaluationMetric.set_numSentences(numSentences);
 +    EvaluationMetric.set_numDocuments(numDocuments);
 +    EvaluationMetric.set_refsPerSen(refsPerSen);
 +    EvaluationMetric.set_refSentences(refSentences);
 +    EvaluationMetric.set_tmpDirPrefix(tmpDirPrefix);
 +
 +    evalMetric = EvaluationMetric.getMetric(metricName, metricOptions);
 +
 +    suffStatsCount = evalMetric.get_suffStatsCount();
 +
 +    // set static data members for the IntermediateOptimizer class
 +    IntermediateOptimizer.set_MERTparams(numSentences, numDocuments, docOfSentence, docSubsetInfo,
 +        numParams, normalizationOptions, isOptimizable, minThValue, maxThValue,
 +        oneModificationPerIteration, evalMetric, tmpDirPrefix, verbosity);
 +
 +
 +
 +    if (randsToSkip == 0) { // i.e. first iteration
 +      println("Number of sentences: " + numSentences, 1);
 +      println("Number of documents: " + numDocuments, 1);
 +      println("Optimizing " + metricName_display, 1);
 +
 +      print("docSubsetInfo: {", 1);
 +      for (int f = 0; f < 6; ++f)
 +        print(docSubsetInfo[f] + ", ", 1);
 +      println(docSubsetInfo[6] + "}", 1);
 +
 +      println("Number of features: " + numParams, 1);
 +      print("Feature names: {", 1);
 +      for (int c = 1; c <= numParams; ++c) {
 +        print("\"" + paramNames[c] + "\"", 1);
 +        if (c < numParams) print(",", 1);
 +      }
 +      println("}", 1);
 +      println("", 1);
 +
 +      println("c    Default value\tOptimizable?\tCrit. val. range\tRand. val. range", 1);
 +
 +      for (int c = 1; c <= numParams; ++c) {
 +        print(c + "     " + f4.format(lambda[c]) + "\t\t", 1);
 +        if (!isOptimizable[c]) {
 +          println(" No", 1);
 +        } else {
 +          print(" Yes\t\t", 1);
 +          // print("[" + minThValue[c] + "," + maxThValue[c] + "] @ " + precision[c] +
 +          // " precision",1);
 +          print(" [" + minThValue[c] + "," + maxThValue[c] + "]", 1);
 +          print("\t\t", 1);
 +          print(" [" + minRandValue[c] + "," + maxRandValue[c] + "]", 1);
 +          println("", 1);
 +        }
 +      }
 +
 +      println("", 1);
 +      print("Weight vector normalization method: ", 1);
 +      if (normalizationOptions[0] == 0) {
 +        println("none.", 1);
 +      } else if (normalizationOptions[0] == 1) {
 +        println("weights will be scaled so that the \"" + paramNames[(int) normalizationOptions[1]]
 +            + "\" weight has an absolute value of " + normalizationOptions[2] + ".", 1);
 +      } else if (normalizationOptions[0] == 2) {
 +        println("weights will be scaled so that the maximum absolute value is "
 +            + normalizationOptions[1] + ".", 1);
 +      } else if (normalizationOptions[0] == 3) {
 +        println("weights will be scaled so that the minimum absolute value is "
 +            + normalizationOptions[1] + ".", 1);
 +      } else if (normalizationOptions[0] == 4) {
 +        println("weights will be scaled so that the L-" + normalizationOptions[1] + " norm is "
 +            + normalizationOptions[2] + ".", 1);
 +      }
 +
 +      println("", 1);
 +
 +      println("----------------------------------------------------", 1);
 +      println("", 1);
 +
 +      // rename original config file so it doesn't get overwritten
 +      // (original name will be restored in finish())
 +      renameFile(decoderConfigFileName, decoderConfigFileName + ".ZMERT.orig");
 +
 +    } // if (randsToSkip == 0)
 +
 +
 +    if (decoderCommand == null && fakeFileNameTemplate == null) {
 +      println("Loading Joshua decoder...", 1);
 +      myDecoder = new Decoder(joshuaConfiguration);
 +      println("...finished loading @ " + (new Date()), 1);
 +      println("");
 +    } else {
 +      myDecoder = null;
 +    }
 +
 +
 +
 +    @SuppressWarnings("unchecked")
 +    TreeSet<Integer>[] temp_TSA = new TreeSet[numSentences];
 +    indicesOfInterest_all = temp_TSA;
 +
 +    for (int i = 0; i < numSentences; ++i) {
 +      indicesOfInterest_all[i] = new TreeSet<Integer>();
 +    }
 +
 +
 +  } // void initialize(...)
 +
 +  public void run_MERT() {
 +    run_MERT(minMERTIterations, maxMERTIterations, prevMERTIterations);
 +  }
 +
 +  public void run_MERT(int minIts, int maxIts, int prevIts) {
 +    println("----------------------------------------------------", 1);
 +    println("Z-MERT run started @ " + (new Date()), 1);
 +    // printMemoryUsage();
 +    println("----------------------------------------------------", 1);
 +    println("", 1);
 +
 +    if (randInit) {
 +      println("Initializing lambda[] randomly.", 1);
 +
 +      // initialize optimizable parameters randomly (sampling uniformly from
 +      // that parameter's random value range)
 +      lambda = randomLambda();
 +    }
 +
 +    println("Initial lambda[]: " + lambdaToString(lambda), 1);
 +    println("", 1);
 +
 +    double FINAL_score = evalMetric.worstPossibleScore();
 +
 +
 +    // int[] lastUsedIndex = new int[numSentences];
 +    int[] maxIndex = new int[numSentences];
 +    // used to grow featVal_array dynamically
 +    // HashMap<Integer,int[]>[] suffStats_array = new HashMap[numSentences];
 +    // suffStats_array[i] maps candidates of interest for sentence i to an array
 +    // storing the sufficient statistics for that candidate
 +    for (int i = 0; i < numSentences; ++i) {
 +      // lastUsedIndex[i] = -1;
 +      maxIndex[i] = sizeOfNBest - 1;
 +      // suffStats_array[i] = new HashMap<Integer,int[]>();
 +    }
 +    /*
 +     * double[][][] featVal_array = new double[1+numParams][][]; // indexed by
 +     * [param][sentence][candidate] featVal_array[0] = null; // param indexing starts at 1 for (int
 +     * c = 1; c <= numParams; ++c) { featVal_array[c] = new double[numSentences][]; for (int i = 0;
 +     * i < numSentences; ++i) { featVal_array[c][i] = new double[maxIndex[i]]; // will grow
 +     * dynamically as needed } }
 +     */
 +    int earlyStop = 0;
 +    // number of consecutive iteration an early stopping criterion was satisfied
 +
 +    for (int iteration = 1;; ++iteration) {
 +
 +      double[] A = run_single_iteration(iteration, minIts, maxIts, prevIts, earlyStop, maxIndex);
 +      if (A != null) {
 +        FINAL_score = A[0];
 +        earlyStop = (int) A[1];
 +        if (A[2] == 1) break;
 +      } else {
 +        break;
 +      }
 +
 +    } // for (iteration)
 +
 +    println("", 1);
 +
 +    println("----------------------------------------------------", 1);
 +    println("Z-MERT run ended @ " + (new Date()), 1);
 +    // printMemoryUsage();
 +    println("----------------------------------------------------", 1);
 +    println("", 1);
 +    println("FINAL lambda: " + lambdaToString(lambda) + " (" + metricName_display + ": "
 +        + FINAL_score + ")", 1);
 +    // check if a lambda is outside its threshold range
 +    for (int c = 1; c <= numParams; ++c) {
 +      if (lambda[c] < minThValue[c] || lambda[c] > maxThValue[c]) {
 +        println("Warning: after normalization, lambda[" + c + "]=" + f4.format(lambda[c])
 +            + " is outside its critical value range.", 1);
 +      }
 +    }
 +    println("", 1);
 +
 +    // delete intermediate .temp.*.it* decoder output files
 +    for (int iteration = 1; iteration <= maxIts; ++iteration) {
 +      if (compressFiles == 1) {
 +        deleteFile(tmpDirPrefix + "temp.sents.it" + iteration + ".gz");
 +        deleteFile(tmpDirPrefix + "temp.feats.it" + iteration + ".gz");
 +        if (fileExists(tmpDirPrefix + "temp.stats.it" + iteration + ".copy.gz")) {
 +          deleteFile(tmpDirPrefix + "temp.stats.it" + iteration + ".copy.gz");
 +        } else {
 +          deleteFile(tmpDirPrefix + "temp.stats.it" + iteration + ".gz");
 +        }
 +      } else {
 +        deleteFile(tmpDirPrefix + "temp.sents.it" + iteration);
 +        deleteFile(tmpDirPrefix + "temp.feats.it" + iteration);
 +        if (fileExists(tmpDirPrefix + "temp.stats.it" + iteration + ".copy")) {
 +          deleteFile(tmpDirPrefix + "temp.stats.it" + iteration + ".copy");
 +        } else {
 +          deleteFile(tmpDirPrefix + "temp.stats.it" + iteration);
 +        }
 +      }
 +    }
 +
 +  } // void run_MERT(int maxIts)
 +
 +
 +  @SuppressWarnings("unchecked")
 +  public double[] run_single_iteration(int iteration, int minIts, int maxIts, int prevIts,
 +      int earlyStop, int[] maxIndex) {
 +    double FINAL_score = 0;
 +
 +    double[] retA = new double[3];
 +    // retA[0]: FINAL_score
 +    // retA[1]: earlyStop
 +    // retA[2]: should this be the last iteration?
 +
 +    boolean done = false;
 +    retA[2] = 1; // will only be made 0 if we don't break from the following loop
 +
 +
 +    double[][][] featVal_array = new double[1 + numParams][][];
 +    // indexed by [param][sentence][candidate]
 +    featVal_array[0] = null; // param indexing starts at 1
 +    for (int c = 1; c <= numParams; ++c) {
 +      featVal_array[c] = new double[numSentences][];
 +      for (int i = 0; i < numSentences; ++i) {
 +        featVal_array[c][i] = new double[maxIndex[i] + 1];
 +        // will grow dynamically as needed
 +      }
 +    }
 +
 +
 +    while (!done) { // NOTE: this "loop" will only be carried out once
 +      println("--- Starting Z-MERT iteration #" + iteration + " @ " + (new Date()) + " ---", 1);
 +
 +      // printMemoryUsage();
 +
 +      // run the decoder on all the sentences, producing for each sentence a set of
 +      // sizeOfNBest candidates, with numParams feature values for each candidate
 +
 +      /******************************/
 +      // CREATE DECODER CONFIG FILE //
 +      /******************************/
 +
 +      createConfigFile(lambda, decoderConfigFileName, decoderConfigFileName + ".ZMERT.orig");
 +      // i.e. use the original config file as a template
 +
 +      /***************/
 +      // RUN DECODER //
 +      /***************/
 +
 +      if (iteration == 1) {
 +        println("Decoding using initial weight vector " + lambdaToString(lambda), 1);
 +      } else {
 +        println("Redecoding using weight vector " + lambdaToString(lambda), 1);
 +      }
 +
 +      String[] decRunResult = run_decoder(iteration); // iteration passed in case fake decoder will
 +                                                      // be used
 +      // [0] name of file to be processed
 +      // [1] indicates how the output file was obtained:
 +      // 1: external decoder
 +      // 2: fake decoder
 +      // 3: internal decoder
 +
 +      if (!decRunResult[1].equals("2")) {
 +        println("...finished decoding @ " + (new Date()), 1);
 +      }
 +
 +      checkFile(decRunResult[0]);
 +
 +      println("Producing temp files for iteration " + iteration, 3);
 +
 +      produceTempFiles(decRunResult[0], iteration);
 +
 +      if (saveInterFiles == 1 || saveInterFiles == 3) { // make copy of intermediate config file
 +        if (!copyFile(decoderConfigFileName, decoderConfigFileName + ".ZMERT.it" + iteration)) {
 +          println("Warning: attempt to make copy of decoder config file (to create"
 +              + decoderConfigFileName + ".ZMERT.it" + iteration + ") was unsuccessful!", 1);
 +        }
 +      }
 +      if (saveInterFiles == 2 || saveInterFiles == 3) { // make copy of intermediate decoder output
 +                                                        // file...
 +
 +        if (!decRunResult[1].equals("2")) { // ...but only if no fake decoder
 +          if (!decRunResult[0].endsWith(".gz")) {
 +            if (!copyFile(decRunResult[0], decRunResult[0] + ".ZMERT.it" + iteration)) {
 +              println("Warning: attempt to make copy of decoder output file (to create"
 +                  + decRunResult[0] + ".ZMERT.it" + iteration + ") was unsuccessful!", 1);
 +            }
 +          } else {
 +            String prefix = decRunResult[0].substring(0, decRunResult[0].length() - 3);
 +            if (!copyFile(prefix + ".gz", prefix + ".ZMERT.it" + iteration + ".gz")) {
 +              println("Warning: attempt to make copy of decoder output file (to create" + prefix
 +                  + ".ZMERT.it" + iteration + ".gz" + ") was unsuccessful!", 1);
 +            }
 +          }
 +
 +          if (compressFiles == 1 && !decRunResult[0].endsWith(".gz")) {
 +            gzipFile(decRunResult[0] + ".ZMERT.it" + iteration);
 +          }
 +        } // if (!fake)
 +
 +      }
 +
 +      int[] candCount = new int[numSentences];
 +      int[] lastUsedIndex = new int[numSentences];
 +      ConcurrentHashMap<Integer, int[]>[] suffStats_array = new ConcurrentHashMap[numSentences];
 +      for (int i = 0; i < numSentences; ++i) {
 +        candCount[i] = 0;
 +        lastUsedIndex[i] = -1;
 +        // suffStats_array[i].clear();
 +        suffStats_array[i] = new ConcurrentHashMap<Integer, int[]>();
 +      }
 +
 +      double[][] initialLambda = new double[1 + initsPerIt][1 + numParams];
 +      // the intermediate "initial" lambdas
 +      double[][] finalLambda = new double[1 + initsPerIt][1 + numParams];
 +      // the intermediate "final" lambdas
 +
 +      // set initialLambda[][]
 +      System.arraycopy(lambda, 1, initialLambda[1], 1, numParams);
 +      for (int j = 2; j <= initsPerIt; ++j) {
 +        if (damianos_method == 0) {
 +          initialLambda[j] = randomLambda();
 +        } else {
 +          initialLambda[j] =
 +              randomPerturbation(initialLambda[1], iteration, damianos_method, damianos_param,
 +                  damianos_mult);
 +        }
 +      }
 +
 +//      double[] initialScore = new double[1 + initsPerIt];
 +      double[] finalScore = new double[1 + initsPerIt];
 +
 +      int[][][] best1Cand_suffStats = new int[1 + initsPerIt][numSentences][suffStatsCount];
 +      double[][] best1Score = new double[1 + initsPerIt][numSentences];
 +      // Those two arrays are used to calculate initialScore[]
 +      // (the "score" in best1Score refers to that assigned by the
 +      // decoder; the "score" in initialScore refers to that
 +      // assigned by the evaluation metric)
 +
 +      int firstIt = Math.max(1, iteration - prevIts);
 +      // i.e. only process candidates from the current iteration and candidates
 +      // from up to prevIts previous iterations.
 +      println("Reading candidate translations from iterations " + firstIt + "-" + iteration, 1);
 +      println("(and computing " + metricName
 +          + " sufficient statistics for previously unseen candidates)", 1);
 +      print("  Progress: ");
 +
 +      int[] newCandidatesAdded = new int[1 + iteration];
 +      for (int it = 1; it <= iteration; ++it) {
 +        newCandidatesAdded[it] = 0;
 +      }
 +
 +
 +
 +      try {
 +
 +        // each inFile corresponds to the output of an iteration
 +        // (index 0 is not used; no corresponding index for the current iteration)
 +        BufferedReader[] inFile_sents = new BufferedReader[iteration];
 +        BufferedReader[] inFile_feats = new BufferedReader[iteration];
 +        BufferedReader[] inFile_stats = new BufferedReader[iteration];
 +
 +        for (int it = firstIt; it < iteration; ++it) {
 +          InputStream inStream_sents, inStream_feats, inStream_stats;
 +          if (compressFiles == 0) {
 +            inStream_sents = new FileInputStream(tmpDirPrefix + "temp.sents.it" + it);
 +            inStream_feats = new FileInputStream(tmpDirPrefix + "temp.feats.it" + it);
 +            inStream_stats = new FileInputStream(tmpDirPrefix + "temp.stats.it" + it);
 +          } else {
 +            inStream_sents =
 +                new GZIPInputStream(
 +                    new FileInputStream(tmpDirPrefix + "temp.sents.it" + it + ".gz"));
 +            inStream_feats =
 +                new GZIPInputStream(
 +                    new FileInputStream(tmpDirPrefix + "temp.feats.it" + it + ".gz"));
 +            inStream_stats =
 +                new GZIPInputStream(
 +                    new FileInputStream(tmpDirPrefix + "temp.stats.it" + it + ".gz"));
 +          }
 +
 +          inFile_sents[it] = new BufferedReader(new InputStreamReader(inStream_sents, "utf8"));
 +          inFile_feats[it] = new BufferedReader(new InputStreamReader(inStream_feats, "utf8"));
 +          inFile_stats[it] = new BufferedReader(new InputStreamReader(inStream_stats, "utf8"));
 +        }
 +
 +
 +        InputStream inStream_sentsCurrIt, inStream_featsCurrIt, inStream_statsCurrIt;
 +        if (compressFiles == 0) {
 +          inStream_sentsCurrIt = new FileInputStream(tmpDirPrefix + "temp.sents.it" + iteration);
 +          inStream_featsCurrIt = new FileInputStream(tmpDirPrefix + "temp.feats.it" + iteration);
 +        } else {
 +          inStream_sentsCurrIt =
 +              new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.sents.it" + iteration
 +                  + ".gz"));
 +          inStream_featsCurrIt =
 +              new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.feats.it" + iteration
 +                  + ".gz"));
 +        }
 +
 +        BufferedReader inFile_sentsCurrIt =
 +            new BufferedReader(new InputStreamReader(inStream_sentsCurrIt, "utf8"));
 +        BufferedReader inFile_featsCurrIt =
 +            new BufferedReader(new InputStreamReader(inStream_featsCurrIt, "utf8"));
 +
 +        BufferedReader inFile_statsCurrIt = null; // will only be used if statsCurrIt_exists below
 +                                                  // is set to true
 +        PrintWriter outFile_statsCurrIt = null; // will only be used if statsCurrIt_exists below is
 +                                                // set to false
 +        boolean statsCurrIt_exists = false;
 +        if (fileExists(tmpDirPrefix + "temp.stats.it" + iteration)) {
 +          inStream_statsCurrIt = new FileInputStream(tmpDirPrefix + "temp.stats.it" + iteration);
 +          inFile_statsCurrIt =
 +              new BufferedReader(new InputStreamReader(inStream_statsCurrIt, "utf8"));
 +          statsCurrIt_exists = true;
 +          copyFile(tmpDirPrefix + "temp.stats.it" + iteration, tmpDirPrefix + "temp.stats.it"
 +              + iteration + ".copy");
 +        } else if (fileExists(tmpDirPrefix + "temp.stats.it" + iteration + ".gz")) {
 +          inStream_statsCurrIt =
 +              new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.stats.it" + iteration
 +                  + ".gz"));
 +          inFile_statsCurrIt =
 +              new BufferedReader(new InputStreamReader(inStream_statsCurrIt, "utf8"));
 +          statsCurrIt_exists = true;
 +          copyFile(tmpDirPrefix + "temp.stats.it" + iteration + ".gz", tmpDirPrefix
 +              + "temp.stats.it" + iteration + ".copy.gz");
 +        } else {
 +          outFile_statsCurrIt = new PrintWriter(tmpDirPrefix + "temp.stats.it" + iteration);
 +        }
 +
 +        PrintWriter outFile_statsMerged = new PrintWriter(tmpDirPrefix + "temp.stats.merged");
 +        // write sufficient statistics from all the sentences
 +        // from the output files into a single file
 +        PrintWriter outFile_statsMergedKnown =
 +            new PrintWriter(tmpDirPrefix + "temp.stats.mergedKnown");
 +        // write sufficient statistics from all the sentences
 +        // from the output files into a single file
 +
 +        FileOutputStream outStream_unknownCands =
 +            new FileOutputStream(tmpDirPrefix + "temp.currIt.unknownCands", false);
 +        OutputStreamWriter outStreamWriter_unknownCands =
 +            new OutputStreamWriter(outStream_unknownCands, "utf8");
 +        BufferedWriter outFile_unknownCands = new BufferedWriter(outStreamWriter_unknownCands);
 +
 +        PrintWriter outFile_unknownIndices =
 +            new PrintWriter(tmpDirPrefix + "temp.currIt.unknownIndices");
 +
 +
 +        String sents_str, feats_str, stats_str;
 +
 +        // BUG: this assumes a candidate string cannot be produced for two
 +        // different source sentences, which is not necessarily true
 +        // (It's not actually a bug, but only because existingCandStats gets
 +        // cleared before moving to the next source sentence.)
 +        // FIX: should be made an array, indexed by i
 +        HashMap<String, String> existingCandStats = new HashMap<String, String>();
 +        // Stores precalculated sufficient statistics for candidates, in case
 +        // the same candidate is seen again. (SS stored as a String.)
 +        // Q: Why do we care? If we see the same candidate again, aren't we going
 +        // to ignore it? So, why do we care about the SS of this repeat candidate?
 +        // A: A "repeat" candidate may not be a repeat candidate in later
 +        // iterations if the user specifies a value for prevMERTIterations
 +        // that causes MERT to skip candidates from early iterations.
 +        double[] currFeatVal = new double[1 + numParams];
 +        String[] featVal_str;
 +
 +        int totalCandidateCount = 0;
 +
 +
 +
 +        int[] sizeUnknown_currIt = new int[numSentences];
 +
 +
 +
 +        for (int i = 0; i < numSentences; ++i) {
 +
 +          for (int j = 1; j <= initsPerIt; ++j) {
 +            best1Score[j][i] = NegInf;
 +          }
 +
 +          for (int it = firstIt; it < iteration; ++it) {
 +            // Why up to but *excluding* iteration?
 +            // Because the last iteration is handled a little differently, since
 +            // the SS must be claculated (and the corresponding file created),
 +            // which is not true for previous iterations.
 +
 +            for (int n = 0; n <= sizeOfNBest; ++n) {
 +              // Why up to and *including* sizeOfNBest?
 +              // So that it would read the "||||||" separator even if there is
 +              // a complete list of sizeOfNBest candidates.
 +
 +              // for the nth candidate for the ith sentence, read the sentence, feature values,
 +              // and sufficient statistics from the various temp files
 +
 +              sents_str = inFile_sents[it].readLine();
 +              feats_str = inFile_feats[it].readLine();
 +              stats_str = inFile_stats[it].readLine();
 +
 +              if (sents_str.equals("||||||")) {
 +                n = sizeOfNBest + 1;
 +              } else if (!existingCandStats.containsKey(sents_str)) {
 +
 +                outFile_statsMergedKnown.println(stats_str);
 +
 +                featVal_str = feats_str.split("\\s+");
 +
 +                /* Sparse (labeled) feature version */
 +                if (feats_str.indexOf('=') != -1) {
 +                  for (String featurePair: featVal_str) {
 +                    String[] pair = featurePair.split("=");
 +                    String name = pair[0];
 +                    Double value = Double.parseDouble(pair[1]);
 +                    currFeatVal[c_fromParamName(name)] = value;
 +                  }
 +                } else {
 +                  for (int c = 1; c <= numParams; ++c) {
 +                    try {
 +                      currFeatVal[c] = Double.parseDouble(featVal_str[c - 1]);
 +                    } catch (Exception e) {
 +                      currFeatVal[c] = 0.0;
 +                    }
 +                  // print("fV[" + c + "]=" + currFeatVal[c] + " ",4);
 +                  }
 +                // println("",4);
 +                }
 +
 +
 +                for (int j = 1; j <= initsPerIt; ++j) {
 +                  double score = 0; // i.e. score assigned by decoder
 +                  for (int c = 1; c <= numParams; ++c) {
 +                    score += initialLambda[j][c] * currFeatVal[c];
 +                  }
 +                  if (score > best1Score[j][i]) {
 +                    best1Score[j][i] = score;
 +                    String[] tempStats = stats_str.split("\\s+");
 +                    for (int s = 0; s < suffStatsCount; ++s)
 +                      best1Cand_suffStats[j][i][s] = Integer.parseInt(tempStats[s]);
 +                  }
 +                } // for (j)
 +
 +                existingCandStats.put(sents_str, stats_str);
 +
 +                setFeats(featVal_array, i, lastUsedIndex, maxIndex, currFeatVal);
 +                candCount[i] += 1;
 +
 +                newCandidatesAdded[it] += 1;
 +
 +              } // if unseen candidate
 +
 +            } // for (n)
 +
 +          } // for (it)
 +
 +          outFile_statsMergedKnown.println("||||||");
 +
 +
 +          // now process the candidates of the current iteration
 +          // now determine the new candidates of the current iteration
 +
 +          /*
 +           * remember: BufferedReader inFile_sentsCurrIt BufferedReader inFile_featsCurrIt
 +           * PrintWriter outFile_statsCurrIt
 +           */
 +
 +          String[] sentsCurrIt_currSrcSent = new String[sizeOfNBest + 1];
 +
 +          Vector<String> unknownCands_V = new Vector<String>();
 +          // which candidates (of the i'th source sentence) have not been seen before
 +          // this iteration?
 +
 +          for (int n = 0; n <= sizeOfNBest; ++n) {
 +            // Why up to and *including* sizeOfNBest?
 +            // So that it would read the "||||||" separator even if there is
 +            // a complete list of sizeOfNBest candidates.
 +
 +            // for the nth candidate for the ith sentence, read the sentence,
 +            // and store it in the sentsCurrIt_currSrcSent array
 +
 +            sents_str = inFile_sentsCurrIt.readLine();
 +            sentsCurrIt_currSrcSent[n] = sents_str; // Note: possibly "||||||"
 +
 +            if (sents_str.equals("||||||")) {
 +              n = sizeOfNBest + 1;
 +            } else if (!existingCandStats.containsKey(sents_str)) {
 +              unknownCands_V.add(sents_str);
 +              writeLine(sents_str, outFile_unknownCands);
 +              outFile_unknownIndices.println(i);
 +              newCandidatesAdded[iteration] += 1;
 +              existingCandStats.put(sents_str, "U"); // i.e. unknown
 +              // we add sents_str to avoid duplicate entries in unknownCands_V
 +            }
 +
 +          } // for (n)
 +
 +
 +
 +          // now unknownCands_V has the candidates for which we need to calculate
 +          // sufficient statistics (for the i'th source sentence)
 +          int sizeUnknown = unknownCands_V.size();
 +          sizeUnknown_currIt[i] = sizeUnknown;
 +
 +          /*********************************************/
 +          /*
 +           * String[] unknownCands = new String[sizeUnknown]; unknownCands_V.toArray(unknownCands);
 +           * int[] indices = new int[sizeUnknown]; for (int d = 0; d < sizeUnknown; ++d) {
 +           * existingCandStats.remove(unknownCands[d]); // remove the (unknownCands[d],"U") entry
 +           * from existingCandStats // (we had added it while constructing unknownCands_V to avoid
 +           * duplicate entries) indices[d] = i; }
 +           */
 +          /*********************************************/
 +
 +          existingCandStats.clear();
 +
 +        } // for (i)
 +
 +        /*
 +         * int[][] newSuffStats = null; if (!statsCurrIt_exists && sizeUnknown > 0) { newSuffStats =
 +         * evalMetric.suffStats(unknownCands, indices); }
 +         */
 +
 +        outFile_statsMergedKnown.close();
 +        outFile_unknownCands.close();
 +        outFile_unknownIndices.close();
 +
 +
 +        for (int it = firstIt; it < iteration; ++it) {
 +          inFile_sents[it].close();
 +          inFile_stats[it].close();
 +
 +          InputStream inStream_sents, inStream_stats;
 +          if (compressFiles == 0) {
 +            inStream_sents = new FileInputStream(tmpDirPrefix + "temp.sents.it" + it);
 +            inStream_stats = new FileInputStream(tmpDirPrefix + "temp.stats.it" + it);
 +          } else {
 +            inStream_sents =
 +                new GZIPInputStream(
 +                    new FileInputStream(tmpDirPrefix + "temp.sents.it" + it + ".gz"));
 +            inStream_stats =
 +                new GZIPInputStream(
 +                    new FileInputStream(tmpDirPrefix + "temp.stats.it" + it + ".gz"));
 +          }
 +
 +          inFile_sents[it] = new BufferedReader(new InputStreamReader(inStream_sents, "utf8"));
 +          inFile_stats[it] = new BufferedReader(new InputStreamReader(inStream_stats, "utf8"));
 +        }
 +
 +        inFile_sentsCurrIt.close();
 +        if (compressFiles == 0) {
 +          inStream_sentsCurrIt = new FileInputStream(tmpDirPrefix + "temp.sents.it" + iteration);
 +        } else {
 +          inStream_sentsCurrIt =
 +              new GZIPInputStream(new FileInputStream(tmpDirPrefix + "temp.sents.it" + iteration
 +                  + ".gz"));
 +        }
 +        inFile_sentsCurrIt =
 +            new BufferedReader(new InputStreamReader(inStream_sentsCurrIt, "utf8"));
 +
 +
 +
 +        // calculate SS for unseen candidates and write them to file
 +        FileInputStream inStream_statsCurrIt_unknown = null;
 +        BufferedReader inFile_statsCurrIt_unknown = null;
 +
 +        if (!statsCurrIt_exists && newCandidatesAdded[iteration] > 0) {
 +          // create the file...
 +          evalMetric.createSuffStatsFile(tmpDirPrefix + "temp.currIt.unknownCands", tmpDirPrefix
 +              + "temp.currIt.unknownIndices", tmpDirPrefix + "temp.stats.unknown", sizeOfNBest);
 +
 +          // ...and open it
 +          inStream_statsCurrIt_unknown = new FileInputStream(tmpDirPrefix + "temp.stats.unknown");
 +          inFile_statsCurrIt_unknown =
 +              new BufferedReader(new InputStreamReader(inStream_statsCurrIt_unknown, "utf8"));
 +        }
 +
 +        // OPEN mergedKnown file
 +        FileInputStream instream_statsMergedKnown =
 +            new FileInputStream(tmpDirPrefix + "temp.stats.mergedKnown");
 +        BufferedReader inFile_statsMergedKnown =
 +            new BufferedReader(new InputStreamReader(instream_statsMergedKnown, "utf8"));
 +
 +        for (int i = 0; i < numSentences; ++i) {
 +
 +          // reprocess candidates from previous iterations
 +          for (int it = firstIt; it < iteration; ++it) {
 +            for (int n = 0; n <= sizeOfNBest; ++n) {
 +
 +              sents_str = inFile_sents[it].readLine();
 +              stats_str = inFile_stats[it].readLine();
 +
 +              if (sents_str.equals("||||||")) {
 +                n = sizeOfNBest + 1;
 +              } else if (!existingCandStats.containsKey(sents_str)) {
 +                existingCandStats.put(sents_str, stats_str);
 +              } // if unseen candidate
 +
 +            } // for (n)
 +          } // for (it)
 +
 +          // copy relevant portion from mergedKnown to the merged file
 +          String line_mergedKnown = inFile_statsMergedKnown.readLine();
 +          while (!line_mergedKnown.equals("||||||")) {
 +            outFile_statsMerged.println(line_mergedKnown);
 +            line_mergedKnown = inFile_statsMergedKnown.readLine();
 +          }
 +
 +          int[] stats = new int[suffStatsCount];
 +
 +          for (int n = 0; n <= sizeOfNBest; ++n) {
 +            // Why up to and *including* sizeOfNBest?
 +            // So that it would read the "||||||" separator even if there is
 +            // a complete list of sizeOfNBest candidates.
 +
 +            // for the nth candidate for the ith sentence, read the sentence, feature values,
 +            // and sufficient statistics from the various temp files
 +
 +            sents_str = inFile_sentsCurrIt.readLine();
 +            feats_str = inFile_featsCurrIt.readLine();
 +
 +            if (sents_str.equals("||||||")) {
 +              n = sizeOfNBest + 1;
 +            } else if (!existingCandStats.containsKey(sents_str)) {
 +
 +              if (!statsCurrIt_exists) {
 +                stats_str = inFile_statsCurrIt_unknown.readLine();
 +
 +                String[] temp_stats = stats_str.split("\\s+");
 +                for (int s = 0; s < suffStatsCount; ++s) {
 +                  stats[s] = Integer.parseInt(temp_stats[s]);
 +                }
 +
 +                /*
 +                 * stats_str = ""; for (int s = 0; s < suffStatsCount-1; ++s) { stats[s] =
 +                 * newSuffStats[d][s]; stats_str += (stats[s] + " "); } stats[suffStatsCount-1] =
 +                 * newSuffStats[d][suffStatsCount-1]; stats_str += stats[suffStatsCount-1];
 +                 */
 +
 +                outFile_statsCurrIt.println(stats_str);
 +              } else {
 +                stats_str = inFile_statsCurrIt.readLine();
 +                String[] temp_stats = stats_str.split("\\s+");
 +                for (int s = 0; s < suffStatsCount; ++s) {
 +                  try {
 +                    stats[s] = Integer.parseInt(temp_stats[s]);
 +                  } catch (Exception e) {
 +                    stats[s] = 0;
 +                  }
 +                }
 +              }
 +
 +              outFile_statsMerged.println(stats_str);
 +
 +              featVal_str = feats_str.split("\\s+");
 +
 +              if (feats_str.indexOf('=') != -1) {
 +                for (String featurePair: featVal_str) {
 +                  String[] pair = featurePair.split("=");
 +                  String name = pair[0];
 +                  Double value = Double.parseDouble(pair[1]);
 +                  currFeatVal[c_fromParamName(name)] = value;
 +                }
 +              } else {
 +                for (int c = 1; c <= numParams; ++c) {
 +                  try {
 +                    currFeatVal[c] = Double.parseDouble(featVal_str[c - 1]);
 +                  } catch (Exception e) {
 +                    // NumberFormatException, ArrayIndexOutOfBoundsException
 +                    currFeatVal[c] = 0.0;
 +                  }
 +
 +                // print("fV[" + c + "]=" + currFeatVal[c] + " ",4);
 +                }
 +              }
 +              // println("",4);
 +
 +
 +              for (int j = 1; j <= initsPerIt; ++j) {
 +                double score = 0; // i.e. score assigned by decoder
 +                for (int c = 1; c <= numParams; ++c) {
 +                  score += initialLambda[j][c] * currFeatVal[c];
 +                }
 +                if (score > best1Score[j][i]) {
 +                  best1Score[j][i] = score;
 +                  for (int s = 0; s < suffStatsCount; ++s)
 +                    best1Cand_suffStats[j][i][s] = stats[s];
 +                }
 +              } // for (j)
 +
 +              existingCandStats.put(sents_str, stats_str);
 +
 +              setFeats(featVal_array, i, lastUsedIndex, maxIndex, currFeatVal);
 +              candCount[i] += 1;
 +
 +              // newCandidatesAdded[iteration] += 1;
 +              // moved to code above detecting new candidates
 +
 +            } else {
 +              if (statsCurrIt_exists)
 +                inFile_statsCurrIt.readLine();
 +              else {
 +                // write SS to outFile_statsCurrIt
 +                stats_str = existingCandStats.get(sents_str);
 +                outFile_statsCurrIt.println(stats_str);
 +              }
 +            }
 +
 +          } // for (n)
 +
 +          // now d = sizeUnknown_currIt[i] - 1
 +
 +          if (statsCurrIt_exists)
 +            inFile_statsCurrIt.readLine();
 +          else
 +            outFile_statsCurrIt.println("||||||");
 +
 +          existingCandStats.clear();
 +          totalCandidateCount += candCount[i];
 +
 +          if ((i + 1) % 500 == 0) {
 +            print((i + 1) + "\n" + "            ", 1);
 +          } else if ((i + 1) % 100 == 0) {
 +            print("+", 1);
 +          } else if ((i + 1) % 25 == 0) {
 +            print(".", 1);
 +          }
 +
 +        } // for (i)
 +
 +        inFile_statsMergedKnown.close();
 +        outFile_statsMerged.close();
 +
 +        println("", 1); // finish progress line
 +
 +        for (int it = firstIt; it < iteration; ++it) {
 +          inFile_sents[it].close();
 +          inFile_feats[it].close();
 +          inFile_stats[it].close();
 +        }
 +
 +        inFile_sentsCurrIt.close();
 +        inFile_featsCurrIt.close();
 +        if (statsCurrIt_exists)
 +          inFile_statsCurrIt.close();
 +        else
 +          outFile_statsCurrIt.close();
 +
 +        if (compressFiles == 1 && !statsCurrIt_exists) {
 +          gzipFile(tmpDirPrefix + "temp.stats.it" + iteration);
 +        }
 +
 +        deleteFile(tmpDirPrefix + "temp.currIt.unknownCands");
 +        deleteFile(tmpDirPrefix + "temp.currIt.unknownIndices");
 +        deleteFile(tmpDirPrefix + "temp.stats.unknown");
 +        deleteFile(tmpDirPrefix + "temp.stats.mergedKnown");
 +
 +        // cleanupMemory();
 +
 +        println("Processed " + totalCandidateCount + " distinct candidates " + "(about "
 +            + totalCandidateCount / numSentences + " per sentence):", 1);
 +        for (int it = firstIt; it <= iteration; ++it) {
 +          println("newCandidatesAdded[it=" + it + "] = " + newCandidatesAdded[it] + " (about "
 +              + newCandidatesAdded[it] / numSentences + " per sentence)", 1);
 +        }
 +
 +        println("", 1);
 +
 +      } catch (IOException e) {
 +        throw new RuntimeException(e);
 +      }
 +
 +
 +      if (newCandidatesAdded[iteration] == 0) {
 +        if (!oneModificationPerIteration) {
 +          println("No new candidates added in this iteration; exiting Z-MERT.", 1);
 +          println("", 1);
 +          println("---  Z-MERT iteration #" + iteration + " ending @ " + (new Date()) + "  ---", 1);
 +          println("", 1);
 +          return null; // THIS MEANS THAT THE OLD VALUES SHOULD BE KEPT BY THE CALLER
 +        } else {
 +          println("Note: No new candidates added in this iteration.", 1);
 +        }
 +      }
 +
 +      // run the initsPerIt optimizations, in parallel, across numOptThreads threads
 +      ExecutorService pool = Executors.newFixedThreadPool(numOptThreads);
 +      Semaphore blocker = new Semaphore(0);
 +      Vector<String>[] threadOutput = new Vector[initsPerIt + 1];
 +
 +      for (int j = 1; j <= initsPerIt; ++j) {
 +        threadOutput[j] = new Vector<String>();
 +        pool.execute(new IntermediateOptimizer(j, blocker, threadOutput[j], initialLambda[j],
 +            finalLambda[j], best1Cand_suffStats[j], finalScore, candCount, featVal_array,
 +            suffStats_array));
 +      }
 +
 +      pool.shutdown();
 +
 +      try {
 +        blocker.acquire(initsPerIt);
 +      } catch (java.lang.InterruptedException e) {
 +        throw new RuntimeException(e);
 +      }
 +
 +      // extract output from threadOutput[]
 +      for (int j = 1; j <= initsPerIt; ++j) {
 +        for (String str : threadOutput[j]) {
 +          println(str); // no verbosity check needed; thread already checked
 +        }
 +      }
 +
 +      int best_j = 1;
 +      double bestFinalScore = finalScore[1];
 +      for (int j = 2; j <= initsPerIt; ++j) {
 +        if (evalMetric.isBetter(finalScore[j], bestFinalScore)) {
 +          best_j = j;
 +          bestFinalScore = finalScore[j];
 +        }
 +      }
 +
 +      if (initsPerIt > 1) {
 +        println("Best final lambda is lambda[j=" + best_j + "] " + "(" + metricName_display + ": "
 +            + f4.format(bestFinalScore) + ").", 1);
 +        println("", 1);
 +      }
 +
 +      FINAL_score = bestFinalScore;
 +
 +      boolean anyParamChanged = false;
 +      boolean anyParamChangedSignificantly = false;
 +
 +      for (int c = 1; c <= numParams; ++c) {
 +        if (finalLambda[best_j][c] != lambda[c]) {
 +          anyParamChanged = true;
 +        }
 +        if (Math.abs(finalLambda[best_j][c] - lambda[c]) > stopSigValue) {
 +          anyParamChangedSignificantly = true;
 +        }
 +      }
 +
 +      System.arraycopy(finalLambda[best_j], 1, lambda, 1, numParams);
 +      println("---  Z-MERT iteration #" + iteration + " ending @ " + (new Date()) + "  ---", 1);
 +      println("", 1);
 +
 +      if (!anyParamChanged) {
 +        println("No parameter value changed in this iteration; exiting Z-MERT.", 1);
 +        println("", 1);
 +        break; // exit for (iteration) loop preemptively
 +      }
 +
 +      // check if a lambda is outside its threshold range
 +      for (int c = 1; c <= numParams; ++c) {
 +        if (lambda[c] < minThValue[c] || lambda[c] > maxThValue[c]) {
 +          println("Warning: after normalization, lambda[" + c + "]=" + f4.format(lambda[c])
 +              + " is outside its critical value range.", 1);
 +        }
 +      }
 +
 +      // was an early stopping criterion satisfied?
 +      boolean critSatisfied = false;
 +      if (!anyParamChangedSignificantly && stopSigValue >= 0) {
 +        println("Note: No parameter value changed significantly " + "(i.e. by more than "
 +            + stopSigValue + ") in this iteration.", 1);
 +        critSatisfied = true;
 +      }
 +
 +      if (critSatisfied) {
 +        ++earlyStop;
 +        println("", 1);
 +      } else {
 +        earlyStop = 0;
 +      }
 +
 +      // if min number of iterations executed, investigate if early exit should happen
 +      if (iteration >= minIts && earlyStop >= stopMinIts) {
 +        println("Some early stopping criteria has been observed " + "in " + stopMinIts
 +            + " consecutive iterations; exiting Z-MERT.", 1);
 +        println("", 1);
 +        break; // exit for (iteration) loop preemptively
 +      }
 +
 +      // if max number of iterations executed, exit
 +      if (iteration >= maxIts) {
 +        println("Maximum number of MERT iterations reached; exiting Z-MERT.", 1);
 +        println("", 1);
 +        break; // exit for (iteration) loop
 +      }
 +
 +      println("Next iteration will decode with lambda: " + lambdaToString(lambda), 1);
 +      println("", 1);
 +
 +      // printMemoryUsage();
 +      for (int i = 0; i < numSentences; ++i) {
 +        suffStats_array[i].clear();
 +      }
 +      // cleanupMemory();
 +      // println("",2);
 +
 +
 +      retA[2] = 0; // i.e. this should NOT be the last iteration
 +      done = true;
 +
 +    } // while (!done) // NOTE: this "loop" will only be carried out once
 +
 +
 +    // delete .temp.stats.merged file, since it is not needed in the next
 +    // iteration (it will be recreated from scratch)
 +    deleteFile(tmpDirPrefix + "temp.stats.merged");
 +
 +    retA[0] = FINAL_score;
 +    retA[1] = earlyStop;
 +    return retA;
 +
 +  } // run_single_iteration
 +
 +  private String lambdaToString(double[] lambdaA) {
 +    String retStr = "{";
 +    for (int c = 1; c <= numParams - 1; ++c) {
 +      retStr += "" + lambdaA[c] + ", ";
 +    }
 +    retStr += "" + lambdaA[numParams] + "}";
 +
 +    return retStr;
 +  }
 +
 +  private String[] run_decoder(int iteration) {
 +    String[] retSA = new String[2];
 +    // [0] name of file to be processed
 +    // [1] indicates how the output file was obtained:
 +    // 1: external decoder
 +    // 2: fake decoder
 +    // 3: internal decoder
 +
 +    if (fakeFileNameTemplate != null
 +        && fileExists(fakeFileNamePrefix + iteration + fakeFileNameSuffix)) {
 +      String fakeFileName = fakeFileNamePrefix + iteration + fakeFileNameSuffix;
 +      println("Not running decoder; using " + fakeFileName + " instead.", 1);
 +      /*
 +       * if (fakeFileName.endsWith(".gz")) { copyFile(fakeFileName,decoderOutFileName+".gz");
 +       * gunzipFile(decoderOutFileName+".gz"); } else { copyFile(fakeFileName,decoderOutFileName); }
 +       */
 +      retSA[0] = fakeFileName;
 +      retSA[1] = "2";
 +
 +    } else {
 +      println("Running external decoder...", 1);
 +
 +      try {
 +        ArrayList<String> cmd = new ArrayList<String>();
 +        cmd.add(decoderCommandFileName);
 +
 +        if (passIterationToDecoder)
 +          cmd.add(Integer.toString(iteration));
 +
 +        ProcessBuilder pb = new ProcessBuilder(cmd);
 +        // this merges the error and output streams of the subprocess
 +        pb.redirectErrorStream(true);
 +        Process p = pb.start();
 +
 +        // capture the sub-command's output
 +        StreamGobbler outputGobbler = new StreamGobbler(p.getInputStream(), decVerbosity);
 +        outputGobbler.start();
 +
 +        int decStatus = p.waitFor();
 +        if (decStatus != validDecoderExitValue) {
 +          throw new RuntimeException("Call to decoder returned " + decStatus + "; was expecting "
 +              + validDecoderExitValue + ".");
 +        }
 +      } catch (IOException| InterruptedException e) {
 +        throw new RuntimeException(e);
 +      }
 +
 +      retSA[0] = decoderOutFileName;
 +      retSA[1] = "1";
 +    }
 +
 +    return retSA;
 +
 +  }
 +
 +  private void produceTempFiles(String nbestFileName, int iteration) {
 +    try {
 +      String sentsFileName = tmpDirPrefix + "temp.sents.it" + iteration;
 +      String featsFileName = tmpDirPrefix + "temp.feats.it" + iteration;
 +
 +      FileOutputStream outStream_sents = new FileOutputStream(sentsFileName, false);
 +      OutputStreamWriter outStreamWriter_sents = new OutputStreamWriter(outStream_sents, "utf8");
 +      BufferedWriter outFile_sents = new BufferedWriter(outStreamWriter_sents);
 +
 +      PrintWriter outFile_feats = new PrintWriter(featsFileName);
 +
 +
 +      InputStream inStream_nbest = null;
 +      if (nbestFileName.endsWith(".gz")) {
 +        inStream_nbest = new GZIPInputStream(new FileInputStream(nbestFileName));
 +      } else {
 +        inStream_nbest = new FileInputStream(nbestFileName);
 +      }
 +      BufferedReader inFile_nbest =
 +          new BufferedReader(new InputStreamReader(inStream_nbest, "utf8"));
 +
 +      String line; // , prevLine;
 +      String candidate_str = "";
 +      String feats_str = "";
 +
 +      int i = 0;
 +      int n = 0;
 +      line = inFile_nbest.readLine();
 +
 +      while (line != null) {
 +
 +        // skip blank lines
 +        if (line.equals("")) continue;
 +
 +        // skip lines that aren't formatted correctly
 +        if (line.indexOf("|||") == -1)
 +          continue;
 +
 +        /*
 +         * line format:
-          * 
++         *
 +         * i ||| words of candidate translation . ||| feat-1_val feat-2_val ... feat-numParams_val
 +         * .*
-          * 
++         *
 +         * Updated September 2012: features can now be named (for sparse feature compatibility).
 +         * You must name all features or none of them.
 +         */
 +
 +        // in a well formed file, we'd find the nth candidate for the ith sentence
 +
 +        int read_i = Integer.parseInt((line.substring(0, line.indexOf("|||"))).trim());
 +
 +        if (read_i != i) {
 +          writeLine("||||||", outFile_sents);
 +          outFile_feats.println("||||||");
 +          n = 0;
 +          ++i;
 +        }
 +
 +        line = (line.substring(line.indexOf("|||") + 3)).trim(); // get rid of initial text
 +
 +        candidate_str = (line.substring(0, line.indexOf("|||"))).trim();
 +        feats_str = (line.substring(line.indexOf("|||") + 3)).trim();
 +        // get rid of candidate string
 +
 +        int junk_i = feats_str.indexOf("|||");
 +        if (junk_i >= 0) {
 +          feats_str = (feats_str.substring(0, junk_i)).trim();
 +        }
 +
 +        writeLine(normalize(candidate_str, textNormMethod), outFile_sents);
 +        outFile_feats.println(feats_str);
 +
 +        ++n;
 +        if (n == sizeOfNBest) {
 +          writeLine("||||||", outFile_sents);
 +          outFile_feats.println("||||||");
 +          n = 0;
 +          ++i;
 +        }
 +
 +        line = inFile_nbest.readLine();
 +      }
 +
 +      if (i != numSentences) { // last sentence had too few candidates
 +        writeLine("||||||", outFile_sents);
 +        outFile_feats.println("||||||");
 +      }
 +
 +      inFile_nbest.close();
 +      outFile_sents.close();
 +      outFile_feats.close();
 +
 +      if (compressFiles == 1) {
 +        gzipFile(sentsFileName);
 +        gzipFile(featsFileName);
 +      }
 +
 +    } catch (IOException e) {
 +      throw new RuntimeException(e);
 +    }
 +
 +  }
 +
 +  private void createConfigFile(double[] params, String cfgFileName, String templateFileName) {
 +    try {
 +      // i.e. create cfgFileName, which is similar to templateFileName, but with
 +      // params[] as parameter values
 +
 +      BufferedReader inFile = new BufferedReader(new FileReader(templateFileName));
 +      PrintWriter outFile = new PrintWriter(cfgFileName);
 +
 +      String line = inFile.readLine();
 +
 +      while (line != null) {
 +        int c_match = -1;
 +        for (int c = 1; c <= numParams; ++c) {
 +          if (line.startsWith(paramNames[c] + " ")) {
 +            c_match = c;
 +            break;
 +          }
 +        }
 +
 +        if (c_match == -1) {
 +          outFile.println(line);
 +        } else {
 +          outFile.println(paramNames[c_match] + " " + params[c_match]);
 +        }
 +
 +        line = inFile.readLine();
 +      }
 +
 +      inFile.close();
 +      outFile.close();
 +    } catch (IOException e) {
 +      throw new RuntimeException(e);
 +    }
 +  }
 +
 +  private void processParamFile() {
 +    // process parameter file
 +    Scanner inFile_init = null;
 +    try {
 +      inFile_init = new Scanner(new FileReader(paramsFileName));
 +    } catch (FileNotFoundException e) {
 +      throw new RuntimeException("FileNotFoundException in MertCore.processParamFile(): " + e.getMessage());
 +    }
 +
 +    String dummy = "";
 +
 +    // initialize lambda[] and other related arrays
 +    for (int c = 1; c <= numParams; ++c) {
 +      // skip parameter name
 +      while (!dummy.equals("|||")) {
 +        dummy = inFile_init.next();
 +      }
 +
 +      // read default value
 +      lambda[c] = inFile_init.nextDouble();
 +      defaultLambda[c] = lambda[c];
 +
 +      // read isOptimizable
 +      dummy = inFile_init.next();
 +      if (dummy.equals("Opt")) {
 +        isOptimizable[c] = true;
 +      } else if (dummy.equals("Fix")) {
 +        isOptimizable[c] = false;
 +      } else {
 +        throw new RuntimeException("Unknown isOptimizable string " + dummy
 +            + " (must be either Opt or Fix)");
 +      }
 +
 +      if (!isOptimizable[c]) { // skip next four values
 +        dummy = inFile_init.next();
 +        dummy = inFile_init.next();
 +        dummy = inFile_init.next();
 +        dummy = inFile_init.next();
 +      } else {
 +        // set minThValue[c] and maxThValue[c] (range for thresholds to investigate)
 +        dummy = inFile_init.next();
 +        if (dummy.equals("-Inf")) {
 +          minThValue[c] = NegInf;
 +        } else if (dummy.equals("+Inf")) {
 +          throw new RuntimeException("minThValue[" + c + "] cannot be +Inf!");
 +        } else {
 +          minThValue[c] = Double.parseDouble(dummy);
 +        }
 +
 +        dummy = inFile_init.next();
 +        if (dummy.equals("-Inf")) {
 +          throw new RuntimeException("maxThValue[" + c + "] cannot be -Inf!");
 +        } else if (dummy.equals("+Inf")) {
 +          maxThValue[c] = PosInf;
 +        } else {
 +          maxThValue[c] = Double.parseDouble(dummy);
 +        }
 +
 +        // set minRandValue[c] and maxRandValue[c] (range for random values)
 +        dummy = inFile_init.next();
 +        if (dummy.equals("-Inf") || dummy.equals("+Inf")) {
 +          throw new RuntimeException("minRandValue[" + c + "] cannot be -Inf or +Inf!");
 +        } else {
 +          minRandValue[c] = Double.parseDouble(dummy);
 +        }
 +
 +        dummy = inFile_init.next();
 +        if (dummy.equals("-Inf") || dummy.equals("+Inf")) {
 +          throw new RuntimeException("maxRandValue[" + c + "] cannot be -Inf or +Inf!");
 +        } else {
 +          maxRandValue[c] = Double.parseDouble(dummy);
 +        }
 +
 +
 +        // check for illogical values
 +        if (minThValue[c] > maxThValue[c]) {
 +          throw new RuntimeException("minThValue[" + c + "]=" + minThValue[c]
 +              + " > " + maxThValue[c] + "=maxThValue[" + c + "]!");
 +        }
 +        if (minRandValue[c] > maxRandValue[c]) {
 +          throw new RuntimeException("minRandValue[" + c + "]=" + minRandValue[c]
 +              + " > " + maxRandValue[c] + "=maxRandValue[" + c + "]!");
 +        }
 +
 +        // check for odd values
 +        if (!(minThValue[c] <= lambda[c] && lambda[c] <= maxThValue[c])) {
 +          println("Warning: lambda[" + c + "] has initial value (" + lambda[c] + ")", 1);
 +          println("         that is outside its critical value range " + "[" + minThValue[c] + ","
 +              + maxThValue[c] + "]", 1);
 +        }
 +
 +        if (minThValue[c] == maxThValue[c]) {
 +          println("Warning: lambda[" + c + "] has " + "minThValue = maxThValue = " + minThValue[c]
 +              + ".", 1);
 +        }
 +
 +        if (minRandValue[c] == maxRandValue[c]) {
 +          println("Warning: lambda[" + c + "] has " + "minRandValue = maxRandValue = "
 +              + minRandValue[c] + ".", 1);
 +        }
 +
 +        if (minRandValue[c] < minThValue[c] || minRandValue[c] > maxThValue[c]
 +            || maxRandValue[c] < minThValue[c] || maxRandValue[c] > maxThValue[c]) {
 +          println("Warning: The random value range for lambda[" + c + "] is not contained", 1);
 +          println("         within its critical value range.", 1);
 +        }
 +
 +      } // if (!isOptimizable[c])
 +
 +      /*
 +       * precision[c] = inFile_init.nextDouble(); if (precision[c] < 0) { println("precision[" + c +
 +       * "]=" + precision[c] + " < 0!  Must be non-negative."); System.exit(21); }
 +       */
 +
 +    }
 +
 +    // set normalizationOptions[]
 +    String origLine = "";
 +    while (origLine != null && origLine.length() == 0) {
 +      origLine = inFile_init.nextLine();
 +    }
 +
 +
 +    // How should a lambda[] vector be normalized (before decoding)?
 +    // nO[0] = 0: no normalization
 +    // nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1]
 +    // nO[0] = 2: scale so that the maximum absolute value is nO[1]
 +    // nO[0] = 3: scale so that the minimum absolute value is nO[1]
 +    // nO[0] = 4: scale so that the L-nO[1] norm equals nO[2]
 +
 +    // normalization = none
 +    // normalization = absval 1 lm
 +    // normalization = maxabsval 1
 +    // normalization = minabsval 1
 +    // normalization = LNorm 2 1
 +
 +    dummy = (origLine.substring(origLine.indexOf("=") + 1)).trim();
 +    String[] dummyA = dummy.split("\\s+");
 +
 +    if (dummyA[0].equals("none")) {
 +      normalizationOptions[0] = 0;
 +    } else if (dummyA[0].equals("absval")) {
 +      normalizationOptions[0] = 1;
 +      normalizationOptions[1] = Double.parseDouble(dummyA[1]);
 +      String pName = dummyA[2];
 +      for (int i = 3; i < dummyA.length; ++i) { // in case parameter name has multiple words
 +        pName = pName + " " + dummyA[i];
 +      }
 +      normalizationOptions[2] = c_fromParamName(pName);;
 +
 +      if (normalizationOptions[1] <= 0) {
 +        throw new RuntimeException("Value for the absval normalization method must be positive.");
 +      }
 +      if (normalizationOptions[2] == 0) {
 +        throw new RuntimeException("Unrecognized feature name " + normalizationOptions[2]
 +            + " for absval normalization method.");
 +      }
 +    } else if (dummyA[0].equals("maxabsval")) {
 +      normalizationOptions[0] = 2;
 +      normalizationOptions[1] = Double.parseDouble(dummyA[1]);
 +      if (normalizationOptions[1] <= 0) {
 +        throw new RuntimeException("Value for the maxabsval normalization method must be positive.");
 +      }
 +    } else if (dummyA[0].equals("minabsval")) {
 +      normalizationOptions[0] = 3;
 +      normalizationOptions[1] = Double.parseDouble(dummyA[1]);
 +      if (normalizationOptions[1] <= 0) {
 +        throw new RuntimeException("Value for the minabsval normalization method must be positive.");
 +      }
 +    } else if (dummyA[0].equals("LNorm")) {
 +      normalizationOptions[0] = 4;
 +      normalizationOptions[1] = Double.parseDouble(dummyA[1]);
 +      normalizationOptions[2] = Double.parseDouble(dummyA[2]);
 +      if (normalizationOptions[1] <= 0 || normalizationOptions[2] <= 0) {
 +        throw new RuntimeException("Both values for the LNorm normalization method must be positive.");
 +      }
 +    } else {
 +      throw new RuntimeException("Unrecognized normalization method " + dummyA[0] + "; "
 +          + "must be one of none, absval, maxabsval, and LNorm.");
 +    } // if (dummyA[0])
 +
 +    inFile_init.close();
 +  }
 +
 +  private void processDocInfo() {
 +    // sets numDocuments and docOfSentence[]
 +    docOfSentence = new int[numSentences];
 +
 +    if (docInfoFileName == null) {
 +      for (int i = 0; i < numSentences; ++i)
 +        docOfSentence[i] = 0;
 +      numDocuments = 1;
 +    } else {
 +
 +      try {
 +
 +        // 4 possible formats:
 +        // 1) List of numbers, one per document, indicating # sentences in each document.
 +        // 2) List of "docName size" pairs, one per document, indicating name of document and #
 +        // sentences.
 +        // 3) List of docName's, one per sentence, indicating which doument each sentence belongs
 +        // to.
 +        // 4) List of docName_number's, one per sentence, indicating which doument each sentence
 +        // belongs to,
 +        // and its order in that document. (can also use '-' instead of '_')
 +
-         int docInfoSize = countNonEmptyLines(docInfoFileName);
++        int docInfoSize = new ExistingUTF8EncodedTextFile(docInfoFileName).getNumberOfNonEmptyLines();
 +
 +        if (docInfoSize < numSentences) { // format #1 or #2
 +          numDocuments = docInfoSize;
 +          int i = 0;
 +
 +          BufferedReader inFile = new BufferedReader(new FileReader(docInfoFileName));
 +          String line = inFile.readLine();
 +          boolean format1 = (!(line.contains(" ")));
 +
 +          for (int doc = 0; doc < numDocuments; ++doc) {
 +
 +            if (doc != 0) line = inFile.readLine();
 +
 +            int docSize = 0;
 +            if (format1) {
 +              docSize = Integer.parseInt(line);
 +            } else {
 +              docSize = Integer.parseInt(line.split("\\s+")[1]);
 +            }
 +
 +            for (int i2 = 1; i2 <= docSize; ++i2) {
 +              docOfSentence[i] = doc;
 +              ++i;
 +            }
 +
 +          }
 +
 +          // now i == numSentences
 +
 +          inFile.close();
 +
 +        } else if (docInfoSize == numSentences) { // format #3 or #4
 +
 +          boolean format3 = false;
 +
 +          HashSet<String> seenStrings = new HashSet<String>();
 +          BufferedReader inFile = new BufferedReader(new FileReader(docInfoFileName));
 +          for (int i = 0; i < numSentences; ++i) {
 +            // set format3 = true if a duplicate is found
 +            String line = inFile.readLine();
 +            if (seenStrings.contains(line)) format3 = true;
 +            seenStrings.add(line);
 +          }
 +
 +          inFile.close();
 +
 +          HashSet<String> seenDocNames = new HashSet<String>();
 +          HashMap<String, Integer> docOrder = new HashMap<String, Integer>();
 +          // maps a document name to the order (0-indexed) in which it was seen
 +
 +          inFile = new BufferedReader(new FileReader(docInfoFileName));
 +          for (int i = 0; i < numSentences; ++i) {
 +            String line = inFile.readLine();
 +
 +            String docName = "";
 +            if (format3) {
 +              docName = line;
 +            } else {
 +              int sep_i = Math.max(line.lastIndexOf('_'), line.lastIndexOf('-'));
 +              docName = line.substring(0, sep_i);
 +            }
 +
 +            if (!seenDocNames.contains(docName)) {
 +              seenDocNames.add(docName);
 +              docOrder.put(docName, seenDocNames.size() - 1);
 +            }
 +
 +            int docOrder_i = docOrder.get(docName);
 +
 +            docOfSentence[i] = docOrder_i;
 +
 +          }
 +
 +          inFile.close();
 +
 +          numDocuments = seenDocNames.size();
 +
 +        } else { // badly formatted
 +
 +        }
 +
 +      } catch (IOException e) {
 +        throw new RuntimeException(e);
 +      }
 +    }
 +
 +  }
 +
 +  private boolean copyFile(String origFileName, String newFileName) {
 +    try {
 +      File inputFile = new File(origFileName);
 +      File outputFile = new File(newFileName);
 +
 +      InputStream in = new FileInputStream(inputFile);
 +      OutputStream out = new FileOutputStream(outputFile);
 +
 +      byte[] buffer = new byte[1024];
 +      int len;
 +      while ((len = in.read(buffer)) > 0) {
 +        out.write(buffer, 0, len);
 +      }
 +      in.close();
 +      out.close();
 +
 +      /*
 +       * InputStream inStream = new FileInputStream(new File(origFileName)); BufferedReader inFile =
 +       * new BufferedReader(new InputStreamReader(inStream, "utf8"));
-        * 
++       *
 +       * FileOutputStream outStream = new FileOutputStream(newFileName, false); OutputStreamWriter
 +       * outStreamWriter = new OutputStreamWriter(outStream, "utf8"); BufferedWriter outFile = new
 +       * BufferedWriter(outStreamWriter);
-        * 
++       *
 +       * String line; while(inFile.ready()) { line = inFile.readLine(); writeLine(line, outFile); }
-        * 
++       *
 +       * inFile.close(); outFile.close();
 +       */
 +      return true;
 +    } catch (IOException e) {
 +      LOG.error(e.getMessage(), e);
 +      return false;
 +    }
 +  }
 +
 +  private void renameFile(String origFileName, String newFileName) {
 +    if (fileExists(origFileName)) {
 +      deleteFile(newFileName);
 +      File oldFile = new File(origFileName);
 +      File newFile = new File(newFileName);
 +      if (!oldFile.renameTo(newFile)) {
 +        println("Warning: attempt to rename " + origFileName + " to " + newFileName
 +            + " was unsuccessful!", 1);
 +      }
 +    } else {
 +      println("Warning: file " + origFileName + " does not exist! (in MertCore.renameFile)", 1);
 +    }
 +  }
 +
 +  private void deleteFile(String fileName) {
 +    if (fileExists(fileName)) {
 +      File fd = new File(fileName);
 +      if (!fd.delete()) {
 +        println("Warning: attempt to delete " + fileName + " was unsuccessful!", 1);
 +      }
 +    }
 +  }
 +
 +  private void writeLine(String line, BufferedWriter writer) throws IOException {
 +    writer.write(line, 0, line.length());
 +    writer.newLine();
 +    writer.flush();
 +  }
 +
 +  public void finish() {
 +    if (myDecoder != null) {
 +      myDecoder.cleanUp();
 +    }
 +
 +    // create config file with final values
 +    createConfigFile(lambda, decoderConfigFileName + ".ZMERT.final", decoderConfigFileName
 +        + ".ZMERT.orig");
 +
 +    // delete current decoder config file and decoder output
 +    deleteFile(decoderConfigFileName);
 +    deleteFile(decoderOutFileName);
 +
 +    // restore original name for config file (name was changed
 +    // in initialize() so it doesn't get overwritten)
 +    renameFile(decoderConfigFileName + ".ZMERT.orig", decoderConfigFileName);
 +
 +    if (finalLambdaFileName != null) {
 +      try {
 +        PrintWriter outFile_lambdas = new PrintWriter(finalLambdaFileName);
 +        for (int c = 1; c <= numParams; ++c) {
 +          outFile_lambdas.println(paramNames[c] + " ||| " + lambda[c]);
 +        }
 +        outFile_lambdas.close();
 +
 +      } catch (IOException e) {
 +        throw new RuntimeException(e);
 +      }
 +    }
 +
 +  }
 +
 +  private String[] cfgFileToArgsArray(String fileName) {
 +    checkFile(fileName);
 +
 +    Vector<String> argsVector = new Vector<String>();
 +
 +    BufferedReader inFile = null;
 +    try {
 +      inFile = new BufferedReader(new FileReader(fileName));
 +      String line, origLine;
 +      do {
 +        line = inFile.readLine();
 +        origLine = line; // for error reporting purposes
 +
 +        if (line != null && line.length() > 0 && line.charAt(0) != '#') {
 +
 +          if (line.indexOf("#") != -1) { // discard comment
 +            line = line.substring(0, line.indexOf("#"));
 +          }
 +
 +          line = line.trim();
 +
 +          // now line should look like "-xxx XXX"
 +
 +          String[] paramA = line.split("\\s+");
 +
 +          if (paramA.length == 2 && paramA[0].charAt(0) == '-') {
 +            argsVector.add(paramA[0]);
 +            argsVector.add(paramA[1]);
 +          } else if (paramA.length > 2
 +              && (paramA[0].equals("-m") || paramA[0].equals("-docSet") || paramA[0]
 +                  .equals("-damianos"))) {
 +            // -m (metricName), -docSet, and -damianos are allowed to have extra optinos
 +            for (int opt = 0; opt < paramA.length; ++opt) {
 +              argsVector.add(paramA[opt]);
 +            }
 +          } else {
 +            println("Malformed line in config file:");
 +            println(origLine);
 +            System.exit(70);
 +          }
 +
 +        }
 +      } while (line != null);
 +
 +      inFile.close();
 +    } catch (FileNotFoundException e) {
 +      throw new RuntimeException("Z-MERT configuration file " + fileName + " was not found!", e);
 +    } catch (IOException e) {
 +      throw new RuntimeException(e);
 +    }
 +
 +    String[] argsArray = new String[argsVector.size()];
 +
 +    for (int i = 0; i < argsVector.size(); ++i) {
 +      argsArray[i] = argsVector.elementAt(i);
 +    }
 +
 +    return argsArray;
 +  }
 +
 +  private void processArgsArray(String[] args) {
 +    processArgsArray(args, true);
 +  }
 +
 +  private void processArgsArray(String[] args, boolean firstTime) {
 +    /* set default values */
 +    // Relevant files
 +    dirPrefix = null;
 +    sourceFileName = null;
 +    refFileName = "reference.txt";
 +    refsPerSen = 1;
 +    textNormMethod = 1;
 +    paramsFileName = "params.txt";
 +    docInfoFileName = null;
 +    finalLambdaFileName = null;
 +    // MERT specs
 +    metricName = "BLEU";
 +    metricName_display = metricName;
 +    metricOptions = new String[2];
 +    metricOptions[0] = "4";
 +    metricOptions[1] = "closest";
 +    docSubsetInfo = new int[7];
 +    docSubsetInfo[0] = 0;
 +    maxMERTIterations = 20;
 +    prevMERTIterations = 20;
 +    minMERTIterations = 5;
 +    stopMinIts = 3;
 +    stopSigValue = -1;
 +    //
 +    // /* possibly other early stopping criteria here */
 +    //
 +    numOptThreads = 1;
 +    saveInterFiles = 3;
 +    compressFiles = 0;
 +    initsPerIt = 20;
 +    oneModificationPerIteration = false;
 +    randInit = false;
 +    seed = System.currentTimeMillis();
 +    // useDisk = 2;
 +    // Decoder specs
 +    decoderCommandFileName = null;
 +    passIterationToDecoder = false;
 +    decoderOutFileName = "output.nbest";
 +    validDecoderExitValue = 0;
 +    decoderConfigFileName = "dec_cfg.txt";
 +    sizeOfNBest = 100;
 +    fakeFileNameTemplate = null;
 +    fakeFileNamePrefix = null;
 +    fakeFileNameSuffix = null;
 +    // Output specs
 +    verbosity = 1;
 +    decVerbosity = 1;
 +
 +    damianos_method = 0;
 +    damianos_param = 0.0;
 +    damianos_mult = 0.0;
 +
 +    int i = 0;
 +
 +    while (i < args.length) {
 +      String option = args[i];
 +      // Relevant files
 +      if (option.equals("-dir")) {
 +        dirPrefix = args[i + 1];
 +      } else if (option.equals("-s")) {
 +        sourceFileName = args[i + 1];
 +      } else if (option.equals("-r")) {
 +        refFileName = args[i + 1];
 +      } else if (option.equals("-rps")) {
 +        refsPerSen = Integer.parseInt(args[i + 1]);
 +        if (refsPerSen < 1) {
 +          throw new RuntimeException("refsPerSen must be positive.");
 +        }
 +      } else if (option.equals("-txtNrm")) {
 +        textNormMethod = Integer.parseInt(args[i + 1]);
 +        if (textNormMethod < 0 || textNormMethod > 4) {
 +          throw new RuntimeException("textNormMethod should be between 0 and 4");
 +        }
 +      } else if (option.equals("-p")) {
 +        paramsFileName = args[i + 1];
 +      } else if (option.equals("-docInfo")) {
 +        docInfoFileName = args[i + 1];
 +      } else if (option.equals("-fin")) {
 +        finalLambdaFileName = args[i + 1];
 +        // MERT specs
 +      } else if (option.equals("-m")) {
 +        metricName = args[i + 1];
 +        metricName_display = metricName;
 +        if (EvaluationMetric.knownMetricName(metricName)) {
 +          int optionCount = EvaluationMetric.metricOptionCount(metricName);
 +          metricOptions = new String[optionCount];
 +          for (int opt = 0; opt < optionCount; ++opt) {
 +            metricOptions[opt] = args[i + opt + 2];
 +          }
 +          i += optionCount;
 +        } else {
 +          throw new RuntimeException("Unknown metric name " + metricName + ".");
 +        }
 +      } else if (option.equals("-docSet")) {
 +        String method = args[i + 1];
 +
 +        if (method.equals("all")) {
 +          docSubsetInfo[0] = 0;
 +          i += 0;
 +        } else if (method.equals("bottom")) {
 +          String a = args[i + 2];
 +          if (a.endsWith("d")) {
 +            docSubsetInfo[0] = 1;
 +            a = a.substring(0, a.indexOf("d"));
 +          } else {
 +            docSubsetInfo[0] = 2;
 +            a = a.substring(0, a.indexOf("%"));
 +          }
 +          docSubsetInfo[5] = Integer.parseInt(a);
 +          i += 1;
 +        } else if (method.equals("top")) {
 +          String a = args[i + 2];
 +          if (a.endsWith("d")) {
 +            docSubsetInfo[0] = 3;
 +            a = a.substring(0, a.indexOf("d"));
 +          } else {
 +            docSubsetInfo[0] = 4;
 +            a = a.substring(0, a.indexOf("%"));
 +          }
 +          docSubsetInfo[5] = Integer.parseInt(a);
 +          i += 1;
 +        } else if (method.equals("window")) {
 +          String a1 = args[i + 2];
 +          a1 = a1.substring(0, a1.indexOf("d")); // size of window
 +          String a2 = args[i + 4];
 +          if (a2.indexOf("p") > 0) {
 +            docSubsetInfo[0] = 5;
 +            a2 = a2.substring(0, a2.indexOf("p"));
 +          } else {
 +            docSubsetInfo[0] = 6;
 +            a2 = a2.substring(0, a2.indexOf("r"));
 +          }
 +          docSubsetInfo[5] = Integer.parseInt(a1);
 +          docSubsetInfo[6] = Integer.parseInt(a2);
 +          i += 3;
 +        } else {
 +          throw new RuntimeException("Unknown docSet method " + method + ".");
 +        }
 +      } else if (option.equals("-maxIt")) {
 +        maxMERTIterations = Integer.parseInt(args[i + 1]);
 +        if (maxMERTIterations < 1) {
 +          throw new RuntimeException("maxMERTIts must be positive.");
 +        }
 +      } else if (option.equals("-minIt")) {
 +        minMERTIterations = Integer.parseInt(args[i + 1]);
 +        if (minMERTIterations < 1) {
 +          throw new RuntimeException("minMERTIts must be positive.");
 +        }
 +      } else if (option.equals("-prevIt")) {
 +        prevMERTIterations = Integer.parseInt(args[i + 1]);
 +        if (prevMERTIterations < 0) {
 +          throw new RuntimeException("prevMERTIts must be non-negative.");
 +        }
 +      } else if (option.equals("-stopIt")) {
 +        stopMinIts = Integer.parseInt(args[i + 1]);
 +        if (stopMinIts < 1) {
 +          throw new RuntimeException("stopMinIts must be positive.");
 +        }
 +      } else if (option.equals("-stopSig")) {
 +        stopSigValue = Double.parseDouble(args[i + 1]);
 +      }
 +      //
 +      // /* possibly other early stopping criteria here */
 +      //
 +      else if (option.equals("-thrCnt")) {
 +        numOptThreads = Integer.parseInt(args[i + 1]);
 +        if (numOptThreads < 1) {
 +          throw new RuntimeException("threadCount must be positive.");
 +        }
 +      } else if (option.equals("-save")) {
 +        saveInterFiles = Integer.parseInt(args[i + 1]);
 +        if (saveInterFiles < 0 || saveInterFiles > 3) {
 +          throw new RuntimeException("save should be between 0 and 3");
 +        }
 +      } else if (option.equals("-compress")) {
 +        compressFiles = Integer.parseInt(args[i + 1]);
 +        if (compressFiles < 0 || compressFiles > 1) {
 +          throw new RuntimeException("compressFiles should be either 0 or 1");
 +        }
 +      } else if (option.equals("-ipi")) {
 +        initsPerIt = Integer.parseInt(args[i + 1]);
 +        if (initsPerIt < 1) {
 +          throw new RuntimeException("initsPerIt must be positive.");
 +        }
 +      } else if (option.equals("-opi")) {
 +        int opi = Integer.parseInt(args[i + 1]);
 +        if (opi == 1) {
 +          oneModificationPer

<TRUNCATED>