You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/16 06:26:53 UTC

[37/66] [partial] incubator-joshua git commit: JOSHUA-252 Make it possible to use Maven to build Joshua

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/lattice/NodeIdentifierComparator.java
----------------------------------------------------------------------
diff --git a/src/joshua/lattice/NodeIdentifierComparator.java b/src/joshua/lattice/NodeIdentifierComparator.java
deleted file mode 100644
index 40e50b8..0000000
--- a/src/joshua/lattice/NodeIdentifierComparator.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.lattice;
-
-import java.io.Serializable;
-import java.util.Comparator;
-
-/**
- * Compares nodes based only on the natural order of their integer identifiers.
- * 
- * @author Lane Schwartz
- */
-public class NodeIdentifierComparator implements Comparator<Node<?>>, Serializable {
-
-  private static final long serialVersionUID = 1L;
-
-  /* See Javadoc for java.util.Comparator#compare */
-  public int compare(Node<?> o1, Node<?> o2) {
-    if (o1.id() < o2.id())
-      return -1;
-    else if (o1.id() == o2.id())
-      return 0;
-    return 1;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/lattice/package.html
----------------------------------------------------------------------
diff --git a/src/joshua/lattice/package.html b/src/joshua/lattice/package.html
deleted file mode 100644
index a479be8..0000000
--- a/src/joshua/lattice/package.html
+++ /dev/null
@@ -1,18 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE.  BEGIN WITH A     #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE:    #####
--->
-
-Provides implementations of lattice and related data structures.
-
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/metrics/BLEU.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/BLEU.java b/src/joshua/metrics/BLEU.java
deleted file mode 100644
index 95c6cee..0000000
--- a/src/joshua/metrics/BLEU.java
+++ /dev/null
@@ -1,540 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.metrics;
-
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.logging.Logger;
-
-public class BLEU extends EvaluationMetric {
-  private static final Logger logger = Logger.getLogger(BLEU.class.getName());
-
-  // The maximum n-gram we care about
-  protected int maxGramLength;
-  protected EffectiveLengthMethod effLengthMethod;
-  // 1: closest, 2: shortest, 3: average
-  // protected HashMap[][] maxNgramCounts;
-
-  protected HashMap<String, Integer>[] maxNgramCounts;
-  protected int[][] refWordCount;
-  protected double[] weights;
-
-  public BLEU() {
-    this(4, "closest");
-  }
-
-  public BLEU(String[] BLEU_options) {
-    this(Integer.parseInt(BLEU_options[0]), BLEU_options[1]);
-  }
-
-  public BLEU(int mxGrmLn, String methodStr) {
-    if (mxGrmLn >= 1) {
-      maxGramLength = mxGrmLn;
-    } else {
-      logger.severe("Maximum gram length must be positive");
-      System.exit(1);
-    }
-
-    if (methodStr.equals("closest")) {
-      effLengthMethod = EffectiveLengthMethod.CLOSEST;
-    } else if (methodStr.equals("shortest")) {
-      effLengthMethod = EffectiveLengthMethod.SHORTEST;
-      // } else if (methodStr.equals("average")) {
-      // effLengthMethod = EffectiveLengthMethod.AVERAGE;
-    } else {
-      logger.severe("Unknown effective length method string " + methodStr + ".");
-      // System.out.println("Should be one of closest, shortest, or average.");
-      logger.severe("Should be one of closest or shortest.");
-      System.exit(1);
-    }
-
-    initialize();
-  }
-
-  protected void initialize() {
-    metricName = "BLEU";
-    toBeMinimized = false;
-    suffStatsCount = 2 * maxGramLength + 2;
-    // 2 per gram length for its precision, and 2 for length info
-    set_weightsArray();
-    set_maxNgramCounts();
-  }
-
-  @Override
-  public double bestPossibleScore() {
-    return 1.0;
-  }
-
-  @Override
-  public double worstPossibleScore() {
-    return 0.0;
-  }
-
-  /**
-   * Sets the BLEU weights for each n-gram level to uniform.
-   */
-  protected void set_weightsArray() {
-    weights = new double[1 + maxGramLength];
-    for (int n = 1; n <= maxGramLength; ++n) {
-      weights[n] = 1.0 / maxGramLength;
-    }
-  }
-
-  /**
-   * Computes the maximum ngram counts for each sentence (storing them in
-   * <code>maxNgramCounts</code>), which are used for clipping n-gram counts.
-   */
-  protected void set_maxNgramCounts() {
-    @SuppressWarnings("unchecked")
-    HashMap<String, Integer>[] temp_HMA = new HashMap[numSentences];
-    maxNgramCounts = temp_HMA;
-
-    String gram = "";
-    int oldCount = 0, nextCount = 0;
-
-    for (int i = 0; i < numSentences; ++i) {
-      maxNgramCounts[i] = getNgramCountsAll(refSentences[i][0]);
-      // initialize to ngramCounts[n] of the first reference translation...
-
-      // ...and update as necessary from the other reference translations
-      for (int r = 1; r < refsPerSen; ++r) {
-        HashMap<String, Integer> nextNgramCounts = getNgramCountsAll(refSentences[i][r]);
-        for (Map.Entry<String, Integer> entry : nextNgramCounts.entrySet()) { 
-          gram = entry.getKey();
-          nextCount = entry.getValue();
-
-          if (maxNgramCounts[i].containsKey(gram)) { // update if necessary
-            oldCount = maxNgramCounts[i].get(gram);
-            if (nextCount > oldCount) {
-              maxNgramCounts[i].put(gram, nextCount);
-            }
-          } else { // add it
-            maxNgramCounts[i].put(gram, nextCount);
-          }
-
-        }
-
-      } // for (r)
-
-    } // for (i)
-
-    // For efficiency, calculate the reference lenghts, which will be used in effLength...
-
-    refWordCount = new int[numSentences][refsPerSen];
-    for (int i = 0; i < numSentences; ++i) {
-      for (int r = 0; r < refsPerSen; ++r) {
-        refWordCount[i][r] = wordCount(refSentences[i][r]);
-      }
-    }
-  }
-
-  /**
-   * Computes the BLEU sufficient statistics on a hypothesis.
-   */
-  public int[] suffStats(String cand_str, int i) {
-    int[] stats = new int[suffStatsCount];
-
-    // int wordCount = words.length;
-    // for (int j = 0; j < wordCount; ++j) { words[j] = words[j].intern(); }
-
-    if (!cand_str.equals("")) {
-      String[] words = cand_str.split("\\s+");
-      set_prec_suffStats(stats, words, i);
-      stats[suffStatsCount - 2] = words.length;
-      stats[suffStatsCount - 1] = effLength(words.length, i);
-    } else {
-      String[] words = new String[0];
-      set_prec_suffStats(stats, words, i);
-      stats[suffStatsCount - 2] = 0;
-      stats[suffStatsCount - 1] = effLength(0, i);
-    }
-
-    return stats;
-  }
-
-  /**
-   * Computes the precision sufficient statistics, clipping counts.
-   * 
-   * @param stats
-   * @param words
-   * @param i
-   */
-  public void set_prec_suffStats(int[] stats, String[] words, int i) {
-    HashMap<String, Integer>[] candCountsArray = getNgramCountsArray(words);
-
-    for (int n = 1; n <= maxGramLength; ++n) {
-
-      int correctGramCount = 0;
-      String gram = "";
-      int candGramCount = 0, maxRefGramCount = 0, clippedCount = 0;
-
-      Iterator<String> it = (candCountsArray[n].keySet()).iterator();
-
-      while (it.hasNext()) {
-        // for each n-gram type in the candidate
-        gram = it.next();
-        candGramCount = candCountsArray[n].get(gram);
-        // if (maxNgramCounts[i][n].containsKey(gram)) {
-        // maxRefGramCount = maxNgramCounts[i][n].get(gram);
-        if (maxNgramCounts[i].containsKey(gram)) {
-          maxRefGramCount = maxNgramCounts[i].get(gram);
-        } else {
-          maxRefGramCount = 0;
-        }
-
-        clippedCount = Math.min(candGramCount, maxRefGramCount);
-        correctGramCount += clippedCount;
-      }
-
-      stats[2 * (n - 1)] = correctGramCount;
-      stats[2 * (n - 1) + 1] = Math.max(words.length - (n - 1), 0); // total gram count
-
-    } // for (n)
-  }
-
-  public int effLength(int candLength, int i) {
-    if (effLengthMethod == EffectiveLengthMethod.CLOSEST) { // closest
-
-      int closestRefLength = refWordCount[i][0];
-      int minDiff = Math.abs(candLength - closestRefLength);
-
-      for (int r = 1; r < refsPerSen; ++r) {
-        int nextRefLength = refWordCount[i][r];
-        int nextDiff = Math.abs(candLength - nextRefLength);
-
-        if (nextDiff < minDiff) {
-          closestRefLength = nextRefLength;
-          minDiff = nextDiff;
-        } else if (nextDiff == minDiff && nextRefLength < closestRefLength) {
-          closestRefLength = nextRefLength;
-          minDiff = nextDiff;
-        }
-      }
-
-      return closestRefLength;
-
-    } else if (effLengthMethod == EffectiveLengthMethod.SHORTEST) { // shortest
-
-      int shortestRefLength = refWordCount[i][0];
-
-      for (int r = 1; r < refsPerSen; ++r) {
-        int nextRefLength = refWordCount[i][r];
-        if (nextRefLength < shortestRefLength) {
-          shortestRefLength = nextRefLength;
-        }
-      }
-
-      return shortestRefLength;
-
-    }
-    /*
-     * // commented out because it needs sufficient statistics to be doubles else { // average
-     * 
-     * int totalRefLength = refWordCount[i][0];
-     * 
-     * for (int r = 1; r < refsPerSen; ++r) { totalRefLength += refWordCount[i][r]; }
-     * 
-     * return totalRefLength/(double)refsPerSen;
-     * 
-     * }
-     */
-    return candLength; // should never get here anyway
-
-  }
-
-  public double score(int[] stats) {
-    if (stats.length != suffStatsCount) {
-      logger.severe("Mismatch between stats.length and suffStatsCount (" + stats.length + " vs. "
-          + suffStatsCount + ") in BLEU.score(int[])");
-      System.exit(2);
-    }
-
-    double BLEUsum = 0.0;
-    double smooth_addition = 1.0; // following bleu-1.04.pl
-    double c_len = stats[suffStatsCount - 2];
-    double r_len = stats[suffStatsCount - 1];
-
-    double correctGramCount, totalGramCount;
-
-    for (int n = 1; n <= maxGramLength; ++n) {
-      correctGramCount = stats[2 * (n - 1)];
-      totalGramCount = stats[2 * (n - 1) + 1];
-
-      double prec_n;
-      if (totalGramCount > 0) {
-        prec_n = correctGramCount / totalGramCount;
-      } else {
-        prec_n = 1; // following bleu-1.04.pl ???????
-      }
-
-      if (prec_n == 0) {
-        smooth_addition *= 0.5;
-        prec_n = smooth_addition / (c_len - n + 1);
-        // isn't c_len-n+1 just totalGramCount ???????
-      }
-
-      BLEUsum += weights[n] * Math.log(prec_n);
-
-    }
-
-    double BP = 1.0;
-    if (c_len < r_len)
-      BP = Math.exp(1 - (r_len / c_len));
-    // if c_len > r_len, no penalty applies
-
-    return BP * Math.exp(BLEUsum);
-
-  }
-
-  public void printDetailedScore_fromStats(int[] stats, boolean oneLiner) {
-    double BLEUsum = 0.0;
-    double smooth_addition = 1.0; // following bleu-1.04.pl
-    double c_len = stats[suffStatsCount - 2];
-    double r_len = stats[suffStatsCount - 1];
-
-    double correctGramCount, totalGramCount;
-
-    if (oneLiner) {
-      System.out.print("Precisions: ");
-    }
-
-    for (int n = 1; n <= maxGramLength; ++n) {
-      correctGramCount = stats[2 * (n - 1)];
-      totalGramCount = stats[2 * (n - 1) + 1];
-
-      double prec_n;
-      if (totalGramCount > 0) {
-        prec_n = correctGramCount / totalGramCount;
-      } else {
-        prec_n = 1; // following bleu-1.04.pl ???????
-      }
-
-      if (prec_n > 0) {
-        if (totalGramCount > 0) {
-          if (oneLiner) {
-            System.out.print(n + "=" + f4.format(prec_n) + ", ");
-          } else {
-            System.out.println("BLEU_precision(" + n + ") = " + (int) correctGramCount + " / "
-                + (int) totalGramCount + " = " + f4.format(prec_n));
-          }
-        } else {
-          if (oneLiner) {
-            System.out.print(n + "=N/A, ");
-          } else {
-            System.out
-                .println("BLEU_precision(" + n + ") = N/A (candidate has no " + n + "-grams)");
-          }
-        }
-      } else {
-        smooth_addition *= 0.5;
-        prec_n = smooth_addition / (c_len - n + 1);
-        // isn't c_len-n+1 just totalGramCount ???????
-
-        if (oneLiner) {
-          System.out.print(n + "~" + f4.format(prec_n) + ", ");
-        } else {
-          System.out.println("BLEU_precision(" + n + ") = " + (int) correctGramCount + " / "
-              + (int) totalGramCount + " ==smoothed==> " + f4.format(prec_n));
-        }
-      }
-
-      BLEUsum += weights[n] * Math.log(prec_n);
-
-    }
-
-    if (oneLiner) {
-      System.out.print("(overall=" + f4.format(Math.exp(BLEUsum)) + "), ");
-    } else {
-      System.out.println("BLEU_precision = " + f4.format(Math.exp(BLEUsum)));
-      System.out.println("");
-    }
-
-    double BP = 1.0;
-    if (c_len < r_len)
-      BP = Math.exp(1 - (r_len / c_len));
-    // if c_len > r_len, no penalty applies
-
-    if (oneLiner) {
-      System.out.print("BP=" + f4.format(BP) + ", ");
-    } else {
-      System.out.println("Length of candidate corpus = " + (int) c_len);
-      System.out.println("Effective length of reference corpus = " + (int) r_len);
-      System.out.println("BLEU_BP = " + f4.format(BP));
-      System.out.println("");
-    }
-
-    System.out.println("  => BLEU = " + f4.format(BP * Math.exp(BLEUsum)));
-  }
-
-  protected int wordCount(String cand_str) {
-    if (!cand_str.equals("")) {
-      return cand_str.split("\\s+").length;
-    } else {
-      return 0;
-    }
-  }
-
-  public HashMap<String, Integer>[] getNgramCountsArray(String cand_str) {
-    if (!cand_str.equals("")) {
-      return getNgramCountsArray(cand_str.split("\\s+"));
-    } else {
-      return getNgramCountsArray(new String[0]);
-    }
-  }
-
-  public HashMap<String, Integer>[] getNgramCountsArray(String[] words) {
-    @SuppressWarnings("unchecked")
-    HashMap<String, Integer>[] ngramCountsArray = new HashMap[1 + maxGramLength];
-    ngramCountsArray[0] = null;
-    for (int n = 1; n <= maxGramLength; ++n) {
-      ngramCountsArray[n] = new HashMap<String, Integer>();
-    }
-
-    int len = words.length;
-    String gram;
-    int st = 0;
-
-    for (; st <= len - maxGramLength; ++st) {
-
-      gram = words[st];
-      if (ngramCountsArray[1].containsKey(gram)) {
-        int oldCount = ngramCountsArray[1].get(gram);
-        ngramCountsArray[1].put(gram, oldCount + 1);
-      } else {
-        ngramCountsArray[1].put(gram, 1);
-      }
-
-      for (int n = 2; n <= maxGramLength; ++n) {
-        gram = gram + " " + words[st + n - 1];
-        if (ngramCountsArray[n].containsKey(gram)) {
-          int oldCount = ngramCountsArray[n].get(gram);
-          ngramCountsArray[n].put(gram, oldCount + 1);
-        } else {
-          ngramCountsArray[n].put(gram, 1);
-        }
-      } // for (n)
-
-    } // for (st)
-
-    // now st is either len-maxGramLength+1 or zero (if above loop never entered, which
-    // happens with sentences that have fewer than maxGramLength words)
-
-    for (; st < len; ++st) {
-
-      gram = words[st];
-      if (ngramCountsArray[1].containsKey(gram)) {
-        int oldCount = ngramCountsArray[1].get(gram);
-        ngramCountsArray[1].put(gram, oldCount + 1);
-      } else {
-        ngramCountsArray[1].put(gram, 1);
-      }
-
-      int n = 2;
-      for (int fin = st + 1; fin < len; ++fin) {
-        gram = gram + " " + words[st + n - 1];
-
-        if (ngramCountsArray[n].containsKey(gram)) {
-          int oldCount = ngramCountsArray[n].get(gram);
-          ngramCountsArray[n].put(gram, oldCount + 1);
-        } else {
-          ngramCountsArray[n].put(gram, 1);
-        }
-        ++n;
-      } // for (fin)
-
-    } // for (st)
-
-    return ngramCountsArray;
-
-  }
-
-  public HashMap<String, Integer> getNgramCountsAll(String cand_str) {
-    if (!cand_str.equals("")) {
-      return getNgramCountsAll(cand_str.split("\\s+"));
-    } else {
-      return getNgramCountsAll(new String[0]);
-    }
-  }
-
-  public HashMap<String, Integer> getNgramCountsAll(String[] words) {
-    HashMap<String, Integer> ngramCountsAll = new HashMap<String, Integer>();
-
-    int len = words.length;
-    String gram;
-    int st = 0;
-
-    for (; st <= len - maxGramLength; ++st) {
-
-      gram = words[st];
-      if (ngramCountsAll.containsKey(gram)) {
-        int oldCount = ngramCountsAll.get(gram);
-        ngramCountsAll.put(gram, oldCount + 1);
-      } else {
-        ngramCountsAll.put(gram, 1);
-      }
-
-      for (int n = 2; n <= maxGramLength; ++n) {
-        gram = gram + " " + words[st + n - 1];
-        if (ngramCountsAll.containsKey(gram)) {
-          int oldCount = ngramCountsAll.get(gram);
-          ngramCountsAll.put(gram, oldCount + 1);
-        } else {
-          ngramCountsAll.put(gram, 1);
-        }
-      } // for (n)
-
-    } // for (st)
-
-    // now st is either len-maxGramLength+1 or zero (if above loop never entered, which
-    // happens with sentences that have fewer than maxGramLength words)
-
-    for (; st < len; ++st) {
-
-      gram = words[st];
-      if (ngramCountsAll.containsKey(gram)) {
-        int oldCount = ngramCountsAll.get(gram);
-        ngramCountsAll.put(gram, oldCount + 1);
-      } else {
-        ngramCountsAll.put(gram, 1);
-      }
-
-      int n = 2;
-      for (int fin = st + 1; fin < len; ++fin) {
-        gram = gram + " " + words[st + n - 1];
-
-        if (ngramCountsAll.containsKey(gram)) {
-          int oldCount = ngramCountsAll.get(gram);
-          ngramCountsAll.put(gram, oldCount + 1);
-        } else {
-          ngramCountsAll.put(gram, 1);
-        }
-        ++n;
-      } // for (fin)
-
-    } // for (st)
-
-    return ngramCountsAll;
-
-  }
-
-  enum EffectiveLengthMethod {
-    CLOSEST, SHORTEST, AVERAGE
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/metrics/BLEU_SBP.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/BLEU_SBP.java b/src/joshua/metrics/BLEU_SBP.java
deleted file mode 100644
index e58256b..0000000
--- a/src/joshua/metrics/BLEU_SBP.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.metrics;
-
-public class BLEU_SBP extends BLEU {
-  // constructors
-  public BLEU_SBP() {
-    super();
-  }
-
-  public BLEU_SBP(String[] BLEU_SBP_options) {
-    super(BLEU_SBP_options);
-  }
-
-  public BLEU_SBP(int mxGrmLn, String methodStr) {
-    super(mxGrmLn, methodStr);
-  }
-
-
-
-  public int[] suffStats(String cand_str, int i) {
-    int[] stats = new int[suffStatsCount];
-    stats[0] = 1;
-
-    String[] words = cand_str.split("\\s+");
-
-    // int wordCount = words.length;
-    // for (int j = 0; j < wordCount; ++j) { words[j] = words[j].intern(); }
-
-    set_prec_suffStats(stats, words, i);
-
-    // the only place where BLEU_SBP differs from BLEU /* ~~~ */
-    /* ~~~ */
-    // stats[maxGramLength+1] = words.length;
-    // stats[maxGramLength+2] = effLength(words.length,i);
-    /* ~~~ */
-
-    /* ~~~ */
-    int effectiveLength = effLength(words.length, i);
-    stats[maxGramLength + 1] = Math.min(words.length, effectiveLength);
-    stats[maxGramLength + 2] = effectiveLength;
-    /* ~~~ */
-
-    return stats;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/metrics/EvaluationMetric.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/EvaluationMetric.java b/src/joshua/metrics/EvaluationMetric.java
deleted file mode 100644
index 4dd9fbd..0000000
--- a/src/joshua/metrics/EvaluationMetric.java
+++ /dev/null
@@ -1,399 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.metrics;
-
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.PrintWriter;
-import java.text.DecimalFormat;
-import java.util.Arrays;
-import java.util.TreeMap;
-
-public abstract class EvaluationMetric {
-  /* static data members */
-  private static TreeMap<String, Integer> metricOptionCount; // maps metric names -> number of
-                                                             // options for that metric
-  protected static int numSentences; // number of sentences in the MERT set
-  protected static int numDocuments; // number of documents in the MERT set
-  protected static int refsPerSen;
-  protected static String[][] refSentences;
-  protected final static DecimalFormat f0 = new DecimalFormat("###0");
-  protected final static DecimalFormat f4 = new DecimalFormat("###0.0000");
-  protected static String tmpDirPrefix;
-
-  /* non-static data members */
-  protected int suffStatsCount; // number of sufficient statistics
-  protected String metricName; // number of metric
-  protected boolean toBeMinimized;
-
-  // is this a metric that should be minimized?
-  // e.g. toBeMinimized = true for 01LOSS, WER, TER
-  // toBeMinimized = false for BLEU
-
-  /* static (=> also non-abstract) methods */
-  public static void set_knownMetrics() {
-    metricOptionCount = new TreeMap<String, Integer>();
-
-    metricOptionCount.put("BLEU", 2);
-    // the "BLEU" metric expects an options array of length 2
-    metricOptionCount.put("BLEU_SBP", 2);
-    // the "BLEU_SBP" metric expects an options array of length 2
-    metricOptionCount.put("01LOSS", 0);
-    // the "01LOSS" metric expects an options array of length 0
-    metricOptionCount.put("TER", 6);
-    // the "TER" metric expects an options array of length 5
-    // metricOptionCount.put("METEOR",4);
-    // the "METEOR" metric expects an options array of length 4
-    // metricOptionCount.put("RYPT",5);
-    // the "RYPT" metric expects an options array of length 5
-    metricOptionCount.put("TER-BLEU", 8);
-    // the "TER-BLEU" metric expects an options array of length 7
-    // metricOptionCount.put("WER",0);
-    // the "WER" metric expects an options array of length 0
-    metricOptionCount.put("MC_BLEU", 4);
-    metricOptionCount.put("PRECIS", 6);
-    metricOptionCount.put("SRC_BLEU", 4);
-    metricOptionCount.put("PRECIS-SRC_BLEU", 6);
-    metricOptionCount.put("GL_BLEU", 3);
-  }
-
-  public static EvaluationMetric getMetric(String metricName, String[] metricOptions) {
-    EvaluationMetric retMetric = null;
-
-    if (metricName.equals("BLEU")) {
-      retMetric = new BLEU(metricOptions); // the "BLEU" metric corresponds to the BLEU class
-    } else if (metricName.equals("BLEU_SBP")) {
-      retMetric = new BLEU_SBP(metricOptions); // the "BLEU_SBP" metric corresponds to the BLEU_SBP
-                                               // class
-    } else if (metricName.equals("01LOSS")) {
-      retMetric = new ZeroOneLoss(metricOptions); // the "01LOSS" metric corresponds to the
-                                                  // ZeroOneLoss class
-    } else if (metricName.equals("TER")) {
-      retMetric = new TER(metricOptions); // the "TER" metric corresponds to the TER class
-      // } else if (metricName.equals("METEOR")) {
-      // retMetric = new METEOR(metricOptions); // the "METEOR" metric corresponds to the METEOR
-      // class
-      // } else if (metricName.equals("RYPT")) {
-      // retMetric = new RYPT(metricOptions); // the "RYPT" metric corresponds to the RYPT class
-    } else if (metricName.equals("TER-BLEU")) {
-      retMetric = new TERMinusBLEU(metricOptions); // the "TER-BLEU" metric corresponds to the
-                                                   // TERMinusBLEU class
-      // } else if (metricName.equals("WER")) {
-      // retMetric = new WordErrorRate(metricOptions); // the "WER" metric corresponds to the
-      // WordErrorRate class
-    } else if (metricName.equals("MC_BLEU")) {
-      retMetric = new MinimumChangeBLEU(metricOptions); // the "MC_BLEU" metric corresponds to the
-                                                        // ParaphraseBLEU class
-    } else if (metricName.equals("PRECIS")) {
-      retMetric = new Precis(metricOptions);
-    } else if (metricName.equals("SRC_BLEU")) {
-      retMetric = new SourceBLEU(metricOptions);
-    } else if (metricName.equals("PRECIS-SRC_BLEU")) {
-      retMetric = new PrecisMinusSourceBLEU(metricOptions);
-    } else if (metricName.equals("GL_BLEU")) {
-      retMetric = new GradeLevelBLEU(metricOptions); // the "GL_BLEU" metric corresponds to the
-                                                     // GradeLevelBLEU class
-    }
-    return retMetric;
-  }
-
-  public static void set_numSentences(int x) {
-    numSentences = x;
-  }
-
-  public static void set_numDocuments(int x) {
-    numDocuments = x;
-  }
-
-  public static void set_refsPerSen(int x) {
-    refsPerSen = x;
-  }
-
-  public static void set_tmpDirPrefix(String S) {
-    tmpDirPrefix = S;
-  }
-
-  public static void set_refSentences(String[][] refs) {
-    refSentences = new String[numSentences][refsPerSen];
-    for (int i = 0; i < numSentences; ++i) {
-      for (int r = 0; r < refsPerSen; ++r) {
-        refSentences[i][r] = refs[i][r];
-      }
-    }
-  }
-
-  public static boolean knownMetricName(String name) {
-    return metricOptionCount.containsKey(name);
-  }
-
-  public static int metricOptionCount(String name) {
-    return metricOptionCount.get(name);
-  }
-
-  /* non-abstract, non-static methods */
-  public int get_suffStatsCount() {
-    return suffStatsCount;
-  }
-
-  public String get_metricName() {
-    return metricName;
-  }
-
-  public boolean getToBeMinimized() {
-    return toBeMinimized;
-  }
-
-  public boolean isBetter(double x, double y) {
-    // return true if x is better than y
-    if (toBeMinimized) {
-      return (x < y);
-    } else {
-      return (x > y);
-    }
-  }
-
-  public double score(String cand_str, int i) {
-    String[] SA = new String[1];
-    SA[0] = cand_str;
-    int[] IA = new int[1];
-    IA[0] = i;
-
-    int[][] SS = suffStats(SA, IA);
-
-    int[] stats = new int[suffStatsCount];
-    for (int s = 0; s < suffStatsCount; ++s) {
-      stats[s] = SS[0][s];
-    }
-
-    return score(stats);
-  }
-
-  public double score(String[] topCand_str) {
-    int[] stats = suffStats(topCand_str);
-    return score(stats);
-  }
-
-  public int[] suffStats(String[] topCand_str) {
-    int[] IA = new int[numSentences];
-    for (int i = 0; i < numSentences; ++i) {
-      IA[i] = i;
-    }
-
-    int[][] SS = suffStats(topCand_str, IA);
-
-    int[] totStats = new int[suffStatsCount];
-    for (int s = 0; s < suffStatsCount; ++s) {
-      totStats[s] = 0;
-      for (int i = 0; i < numSentences; ++i) {
-        totStats[s] += SS[i][s];
-      }
-    }
-
-    return totStats;
-  }
-
-  /**
-   * Calculates sufficient statistics on each sentence in the corpus, returning them as arrays.
-   * 
-   * @param cand_strings
-   * @param cand_indices
-   * @return
-   */
-  public int[][] suffStats(String[] cand_strings, int[] cand_indices) {
-
-    int candCount = cand_strings.length;
-    if (cand_indices.length != candCount) {
-      System.out.println("Array lengths mismatch in suffStats(String[],int[]); returning null.");
-      return null;
-    }
-
-    int[][] stats = new int[candCount][suffStatsCount];
-
-    for (int d = 0; d < candCount; ++d) {
-      int[] currStats = suffStats(cand_strings[d], cand_indices[d]);
-
-      for (int s = 0; s < suffStatsCount; ++s) {
-        stats[d][s] = currStats[s];
-      }
-    } // for (d)
-
-    return stats;
-  }
-
-  public void createSuffStatsFile(String cand_strings_fileName, String cand_indices_fileName,
-      String outputFileName, int maxBatchSize) {
-    // similar to the above suffStats(String[], int[])
-
-    try {
-      FileInputStream inStream_cands = new FileInputStream(cand_strings_fileName);
-      BufferedReader inFile_cands =
-          new BufferedReader(new InputStreamReader(inStream_cands, "utf8"));
-
-      FileInputStream inStream_indices = new FileInputStream(cand_indices_fileName);
-      BufferedReader inFile_indices =
-          new BufferedReader(new InputStreamReader(inStream_indices, "utf8"));
-
-      PrintWriter outFile = new PrintWriter(outputFileName);
-
-      String[] cand_strings = new String[maxBatchSize];
-      int[] cand_indices = new int[maxBatchSize];
-
-      String line_cand = inFile_cands.readLine();
-      String line_index = inFile_indices.readLine();
-
-      while (line_cand != null) {
-        int size = 0;
-        while (line_cand != null) {
-          cand_strings[size] = line_cand;
-          cand_indices[size] = Integer.parseInt(line_index);
-          ++size; // now size is how many were read for this currnet batch
-          if (size == maxBatchSize) break;
-
-          line_cand = inFile_cands.readLine();
-          line_index = inFile_indices.readLine();
-        }
-
-        if (size < maxBatchSize) { // last batch, and smaller than maxBatchSize
-          String[] cand_strings_temp = new String[size];
-          int[] cand_indices_temp = new int[size];
-          for (int d = 0; d < size; ++d) {
-            cand_strings_temp[d] = cand_strings[d];
-            cand_indices_temp[d] = cand_indices[d];
-          }
-          cand_strings = cand_strings_temp;
-          cand_indices = cand_indices_temp;
-        }
-
-        int[][] SS = suffStats(cand_strings, cand_indices);
-        for (int d = 0; d < size; ++d) {
-          StringBuilder stats_str = new StringBuilder();
-
-          for (int s = 0; s < suffStatsCount - 1; ++s) {
-            stats_str.append(SS[d][s]).append(" ");
-          }
-          stats_str.append(SS[d][suffStatsCount - 1]);
-
-          outFile.println(stats_str);
-        }
-
-        line_cand = inFile_cands.readLine();
-        line_index = inFile_indices.readLine();
-      }
-
-      inFile_cands.close();
-      inFile_indices.close();
-      outFile.close();
-
-    } catch (IOException e) {
-      System.err.println("IOException in EvaluationMetric.createSuffStatsFile(...): "
-          + e.getMessage());
-      System.exit(99902);
-    }
-
-  }
-
-  public void printDetailedScore(String[] topCand_str, boolean oneLiner) {
-    int[] stats = suffStats(topCand_str);
-    printDetailedScore_fromStats(stats, oneLiner);
-  }
-
-  public double score(int[][] stats) {
-    // returns an average of document scores (aka the document-level score, as opposed to
-    // corpus-level score)
-    // stats[][] is indexed [doc][s]
-
-    double retVal = 0.0;
-    for (int doc = 0; doc < numDocuments; ++doc) {
-      retVal += score(stats[doc]);
-    }
-    return retVal / numDocuments;
-  }
-
-  public double score(int[][] stats, int firstRank, int lastRank) {
-    // returns an average of document scores, restricted to the documents
-    // ranked firstRank-lastRank, inclusive (ranks are 1-indexed, even though the docs are
-    // 0-indexed)
-
-    double[] scores = docScores(stats);
-
-    Arrays.sort(scores);
-    // sorts into ascending order
-
-    double retVal = 0.0;
-
-    if (toBeMinimized) {
-      // scores[0] is rank 1, scores[numDocuments-1] is rank numDocuments
-      // => scores[j] is rank j+1
-      // => rank r is scores[r-1]
-      for (int j = firstRank - 1; j < lastRank; ++j) {
-        retVal += scores[j];
-      }
-    } else {
-      // scores[numDocuments-1] is rank 1, scores[0] is rank numDocuments
-      // => scores[j] is rank numDocuments-j
-      // => rank r is scores[numDocuments-r]
-      for (int j = numDocuments - firstRank; j >= numDocuments - lastRank; --j) {
-        retVal += scores[j];
-      }
-    }
-
-    return retVal / (lastRank - firstRank + 1);
-
-  }
-
-  public double[] docScores(int[][] stats) {
-    // returns an array of document scores
-    // stats[][] is indexed [doc][s]
-
-    double[] scores = new double[numDocuments];
-    for (int doc = 0; doc < numDocuments; ++doc) {
-      scores[doc] = score(stats[doc]);
-    }
-    return scores;
-  }
-
-  public void printDetailedScore_fromStats(int[][] stats, String[] docNames) {
-    // prints individual document scores
-    // stats[][] is indexed [doc][s]
-
-    for (int doc = 0; doc < numDocuments; ++doc) {
-      if (docNames == null) {
-        System.out.print("Document #" + doc + ": ");
-      } else {
-        System.out.print(docNames[doc] + ": ");
-      }
-      printDetailedScore_fromStats(stats[doc], true);
-    }
-  }
-
-  /* abstract (=> also non-static) methods */
-  protected abstract void initialize();
-
-  public abstract double bestPossibleScore();
-
-  public abstract double worstPossibleScore();
-
-  public abstract int[] suffStats(String cand_str, int i);
-
-  public abstract double score(int[] stats);
-
-  public abstract void printDetailedScore_fromStats(int[] stats, boolean oneLiner);
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/metrics/GradeLevelBLEU.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/GradeLevelBLEU.java b/src/joshua/metrics/GradeLevelBLEU.java
deleted file mode 100644
index 06efa8b..0000000
--- a/src/joshua/metrics/GradeLevelBLEU.java
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.metrics;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-import java.io.IOException;
-import java.text.DecimalFormat;
-import java.util.logging.Logger;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-
-public class GradeLevelBLEU extends BLEU {
-  private static final Logger logger = Logger.getLogger(GradeLevelBLEU.class.getName());
-
-  // syllable pattern matches /C*V+/
-  private static final Pattern syllable = Pattern.compile("([^aeiouy]*[aeiouy]+)");
-  private static final Pattern silentE = Pattern.compile("[^aeiou]e$");
-  private static final int SOURCE = 0, CANDIDATE = 1, REFERENCE = 2;
-  private int srcIndex = 1, sentCountIndex;
-  private SourceBLEU srcBLEU;
-  private double targetGL = 9.87; // tune.simp avg GL = 9.8704 (tune.en =
-  // 14.0785
-  private double alpha = 0.9;
-  private boolean useTarget = true;
-  private boolean useBLEUplus = true;
-
-  public GradeLevelBLEU() {
-    super();
-  }
-
-  // target == 0 : use the default target
-  // target > 0 : use that target
-  // target < 0 : use source GL for target
-  public GradeLevelBLEU(String[] options) {
-    super();
-    // there are 3 arguments: target GL, alpha, and source path
-    // the BLEU options are assumed to be "4 closest"
-    if (Double.parseDouble(options[0]) > 0)
-      targetGL = Double.parseDouble(options[0]);
-    else if (Double.parseDouble(options[0]) < 0) useTarget = false;
-    if (Double.parseDouble(options[1]) > 0) alpha = Double.parseDouble(options[1]);
-    try {
-      loadSources(options[2]);
-    } catch (IOException e) {
-      logger.severe("Error loading the source sentences from " + options[2]);
-      System.exit(1);
-    }
-    if (useBLEUplus) srcBLEU = new SourceBLEU(4, "closest", srcIndex, true);
-    initialize();
-  }
-
-  // hacky way to add the source sentence as the last reference sentence (in
-  // accordance with SourceBLEU)
-  public void loadSources(String filepath) throws IOException {
-    String[][] newRefSentences = new String[numSentences][refsPerSen + 1];
-    BufferedReader br = new BufferedReader(new FileReader(filepath));
-    String line;
-    int i = 0;
-    while (i < numSentences && (line = br.readLine()) != null) {
-      for (int r = 0; r < refsPerSen; ++r) {
-        newRefSentences[i][r] = refSentences[i][r];
-      }
-      newRefSentences[i][refsPerSen] = line.trim();
-      i++;
-    }
-    br.close();
-  }
-
-  public void initialize() {
-    metricName = "GL_BLEU";
-    effLengthMethod = EffectiveLengthMethod.SHORTEST;
-    toBeMinimized = false;
-    suffStatsCount = 4 * maxGramLength + 7;
-    sentCountIndex = 4 * maxGramLength;
-    set_weightsArray();
-    set_maxNgramCounts();
-  }
-
-  public int[] suffStats(String cand_str, int i) {
-    int[] stats = new int[suffStatsCount];
-
-    String[] candidate_tokens = null;
-
-    if (!cand_str.equals("")) {
-      candidate_tokens = cand_str.split("\\s+");
-    } else {
-      candidate_tokens = new String[0];
-      stats[tokenLength(CANDIDATE)] = 0;
-      stats[tokenLength(REFERENCE)] = effLength(0, i);
-    }
-    // set the BLEU stats
-    set_prec_suffStats(stats, candidate_tokens, i);
-
-    // set source BLEU stats
-    if (useBLEUplus) {
-      int[] src_prec_suffStats = srcBLEU.suffStats(cand_str, i);
-      for (int j = 0; j < src_prec_suffStats.length; j++) {
-        stats[2 * maxGramLength + j] = src_prec_suffStats[j];
-      }
-    }
-
-    // now set the readability stats
-    String[] reference_tokens = refSentences[i][0].split("\\s+");
-    String[] source_tokens = refSentences[i][srcIndex].split("\\s+");
-
-    // set the number of sentences (necessary to calculate GL)
-    stats[sentCountIndex] = 1;
-    // token length
-    stats[tokenLength(CANDIDATE)] = candidate_tokens.length;
-    stats[tokenLength(REFERENCE)] = reference_tokens.length;
-    stats[tokenLength(SOURCE)] = source_tokens.length;
-
-    // syllable length
-    stats[syllableLength(CANDIDATE)] = countTotalSyllables(candidate_tokens);
-    stats[syllableLength(REFERENCE)] = countTotalSyllables(reference_tokens);
-    stats[syllableLength(SOURCE)] = countTotalSyllables(source_tokens);
-
-    return stats;
-  }
-
-  // create methods for accessing the indices to reduce possible human error
-  private int tokenLength(int whichSentence) {
-    return suffStatsCount - 3 + whichSentence;
-  }
-
-  private int syllableLength(int whichSentence) {
-    return suffStatsCount - 6 + whichSentence;
-  }
-
-  // count syllables in a "sentence" (ss.length >= 1)
-  public int countTotalSyllables(String[] ss) {
-    int count = 0;
-    for (String s : ss) {
-      int i = countSyllables(s);
-      count += i;
-    }
-    return count;
-  }
-
-  // count syllables in a "word"
-  // add a syllable for punctuation, etc., so it isn't free
-  public int countSyllables(String s) {
-    if (s.equals("-")) {
-      return 1;
-    }
-    // if the word is hyphenated, split at the hyphen before counting
-    // syllables
-    if (s.contains("-")) {
-      int count = 0;
-      String[] temp = s.split("-");
-      for (String t : temp)
-        count += countSyllables(t);
-      return count;
-    }
-
-    int count = 0;
-    Matcher m = syllable.matcher(s);
-    while (m.find())
-      count++;
-    // subtract 1 if the word ends in a silent e
-    m = silentE.matcher(s);
-    if (m.find()) count--;
-    if (count <= 0) count = 1;
-    return count;
-  }
-
-  public double score(int[] stats) {
-    if (stats.length != suffStatsCount) {
-      logger.severe("Mismatch between stats.length and suffStatsCount (" + stats.length + " vs. "
-          + suffStatsCount + ") in BLEU.score(int[])");
-      System.exit(2);
-    }
-    double BLEUscore = super.score(stats);
-    double candGL =
-        gradeLevel(stats[tokenLength(CANDIDATE)], stats[syllableLength(CANDIDATE)],
-            stats[sentCountIndex]);
-    double readabilityPenalty = 1;
-
-    if (useTarget) {
-      readabilityPenalty = getReadabilityPenalty(candGL, targetGL);
-    } else {
-      double srcGL =
-          gradeLevel(stats[tokenLength(SOURCE)], stats[syllableLength(SOURCE)],
-              stats[sentCountIndex]);
-      readabilityPenalty = getReadabilityPenalty(candGL, srcGL);
-    }
-
-    if (useBLEUplus) {
-      int[] srcStats = new int[2 * maxGramLength];
-      for (int i = 0; i < 2 * maxGramLength; i++) {
-        srcStats[i] = stats[2 * maxGramLength + i];
-      }
-      srcStats[2 * maxGramLength] = stats[tokenLength(CANDIDATE)];
-      srcStats[2 * maxGramLength] = stats[tokenLength(SOURCE)];
-      double srcBLEUscore = srcBLEU.score(stats);
-      BLEUscore = BLEU_plus(BLEUscore, srcBLEUscore);
-    }
-    return readabilityPenalty * BLEUscore;
-  }
-
-  // Flesch-Kincaid Grade Level
-  // (http://en.wikipedia.org/wiki/Flesch-Kincaid_readability_test)
-  public double gradeLevel(int numWords, int numSyllables, int numSentences) {
-    double d = 0.39 * numWords / numSentences + 11.8 * numSyllables / numWords - 15.19;
-    if (d < 0) d = 0;
-    return d;
-  }
-
-  // calculate BLEU+ (per submitted paper CCB reviewed)
-  private double BLEU_plus(double bleu_ref, double bleu_src) {
-    return alpha * bleu_ref - (1 - alpha) * bleu_src;
-  }
-
-  private double getReadabilityPenalty(double this_gl, double target_gl) {
-    if (this_gl < target_gl) return 1.0;
-    return 0.0;
-  }
-
-  public void printDetailedScore_fromStats(int[] stats, boolean oneLiner) {
-    DecimalFormat df = new DecimalFormat("#.###");
-    double source_gl =
-        gradeLevel(stats[tokenLength(SOURCE)], stats[syllableLength(SOURCE)], stats[sentCountIndex]);
-    double cand_gl =
-        gradeLevel(stats[tokenLength(CANDIDATE)], stats[syllableLength(CANDIDATE)],
-            stats[sentCountIndex]);
-    double ref_gl =
-        gradeLevel(stats[tokenLength(REFERENCE)], stats[syllableLength(REFERENCE)],
-            stats[sentCountIndex]);
-    double penalty = 1;
-    double bleu_ref = super.score(stats);
-    double bleu_src = srcBLEU.score(stats);
-    double bleu_plus = BLEU_plus(bleu_ref, bleu_src);
-
-    if (useTarget)
-      penalty = getReadabilityPenalty(cand_gl, targetGL);
-    else
-      penalty = getReadabilityPenalty(cand_gl, source_gl);
-
-    if (oneLiner) {
-      System.out.print("GL_BLEU=" + df.format(score(stats)));
-      System.out.print(" BLEU=" + df.format(bleu_ref));
-      System.out.print(" BLEU_src=" + df.format(bleu_src));
-      System.out.print(" iBLEU=" + df.format(bleu_plus));
-      System.out.print(" GL_cand=" + df.format(cand_gl));
-      System.out.print(" GL_src=" + df.format(source_gl));
-      System.out.print(" GL_ref=" + df.format(ref_gl));
-      System.out.print(" Read_penalty=" + df.format(penalty));
-      System.out.println();
-    } else {
-      System.out.println("GL_BLEU      = " + df.format(score(stats)));
-      System.out.println("BLEU         = " + df.format(bleu_ref));
-      System.out.println("BLEU_src     = " + df.format(bleu_src));
-      System.out.println("iBLEU        = " + df.format(bleu_plus));
-      System.out.println("GL_cand      = " + df.format(cand_gl));
-      System.out.println("GL_src       = " + df.format(source_gl));
-      System.out.println("GL_ref       = " + df.format(ref_gl));
-      System.out.println("Read penalty = " + df.format(penalty));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/metrics/METEOR.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/METEOR.java b/src/joshua/metrics/METEOR.java
deleted file mode 100644
index d94599b..0000000
--- a/src/joshua/metrics/METEOR.java
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.metrics;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-
-import joshua.util.StreamGobbler;
-
-
-public class METEOR extends EvaluationMetric {
-  protected String targetLanguage;
-  protected boolean normalize;
-  protected boolean keepPunctuation;
-  private int maxComputations;
-
-  public METEOR(String[] Metric_options) {
-    // M_o[0]: -l language, one of {en,cz,fr,de,es}
-    // M_o[1]: -normalize, one of {norm_yes,norm_no}
-    // M_o[2]: -keepPunctuation, one of {keepPunc,removePunc}
-    // M_o[3]: maxComputations, positive integer
-
-    // default in meteor v0.8: en, norm_no, removePunc
-
-    if (Metric_options[0].equals("en")) {
-      targetLanguage = "en";
-    } else if (Metric_options[0].equals("cz")) {
-      targetLanguage = "cz";
-    } else if (Metric_options[0].equals("fr")) {
-      targetLanguage = "fr";
-    } else if (Metric_options[0].equals("de")) {
-      targetLanguage = "de";
-    } else if (Metric_options[0].equals("es")) {
-      targetLanguage = "es";
-    } else {
-      System.out.println("Unknown language string " + Metric_options[0] + ".");
-      System.out.println("Should be one of {en,cz,fr,de,es}.");
-      System.exit(1);
-    }
-
-    if (Metric_options[1].equals("norm_yes")) {
-      normalize = true;
-    } else if (Metric_options[1].equals("norm_no")) {
-      normalize = false;
-    } else {
-      System.out.println("Unknown normalize string " + Metric_options[1] + ".");
-      System.out.println("Should be one of norm_yes or norm_no.");
-      System.exit(1);
-    }
-
-    if (Metric_options[2].equals("keepPunc")) {
-      keepPunctuation = true;
-    } else if (Metric_options[1].equals("removePunk")) {
-      keepPunctuation = false;
-    } else {
-      System.out.println("Unknown keepPunctuation string " + Metric_options[1] + ".");
-      System.out.println("Should be one of keepPunc or removePunk.");
-      System.exit(1);
-    }
-
-    maxComputations = Integer.parseInt(Metric_options[3]);
-    if (maxComputations < 1) {
-      System.out.println("Maximum computations must be positive");
-      System.exit(2);
-    }
-
-    initialize(); // set the data members of the metric
-  }
-
-  protected void initialize() {
-    metricName = "METEOR";
-    toBeMinimized = false;
-    suffStatsCount = 5;
-  }
-
-  public double bestPossibleScore() {
-    return 1.0;
-  }
-
-  public double worstPossibleScore() {
-    return 0.0;
-  }
-
-  public int[] suffStats(String cand_str, int i) {
-    // this method should never be used when the metric is METEOR,
-    // because METEOR.java overrides suffStats(String[],int[]) below,
-    // which is the only method that calls suffStats(Sting,int).
-    return null;
-  }
-
-  public int[][] suffStats(String[] cand_strings, int[] cand_indices) {
-    // calculate sufficient statistics for each sentence in an arbitrary set of candidates
-
-    int candCount = cand_strings.length;
-    if (cand_indices.length != candCount) {
-      System.out.println("Array lengths mismatch in suffStats(String[],int[]); returning null.");
-      return null;
-    }
-
-    int[][] stats = new int[candCount][suffStatsCount];
-
-    try {
-
-      // 1) Create input files for meteor
-
-      // 1a) Create hypothesis file
-      FileOutputStream outStream = new FileOutputStream("hyp.txt.METEOR", false); // false: don't
-                                                                                  // append
-      OutputStreamWriter outStreamWriter = new OutputStreamWriter(outStream, "utf8");
-      BufferedWriter outFile = new BufferedWriter(outStreamWriter);
-
-      for (int d = 0; d < candCount; ++d) {
-        writeLine(cand_strings[d], outFile);
-      }
-
-      outFile.close();
-
-      // 1b) Create reference file
-      outStream = new FileOutputStream("ref.txt.METEOR", false); // false: don't append
-      outStreamWriter = new OutputStreamWriter(outStream, "utf8");
-      outFile = new BufferedWriter(outStreamWriter);
-
-      for (int d = 0; d < candCount; ++d) {
-        for (int r = 0; r < refsPerSen; ++r) {
-          writeLine(refSentences[cand_indices[d]][r], outFile);
-        }
-      }
-
-      outFile.close();
-
-      // 2) Launch meteor as an external process
-
-      String cmd_str = "./meteor hyp.txt.METEOR ref.txt.METEOR";
-      cmd_str += " -l " + targetLanguage;
-      cmd_str += " -r " + refsPerSen;
-      if (normalize) {
-        cmd_str += " -normalize";
-      }
-      if (keepPunctuation) {
-        cmd_str += " -keepPunctuation";
-      }
-      cmd_str += " -ssOut";
-
-      Runtime rt = Runtime.getRuntime();
-      Process p = rt.exec(cmd_str);
-
-      StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(), 0);
-      StreamGobbler outputGobbler = new StreamGobbler(p.getInputStream(), 0);
-
-      errorGobbler.start();
-      outputGobbler.start();
-
-      @SuppressWarnings("unused")
-      int exitValue = p.waitFor();
-
-
-      // 3) Read SS from output file produced by meteor
-
-      BufferedReader inFile = new BufferedReader(new FileReader("TER_out.ter"));
-      String line = "";
-
-      line = inFile.readLine(); // skip hyp line
-      line = inFile.readLine(); // skip ref line
-
-      for (int d = 0; d < candCount; ++d) {
-        line = inFile.readLine(); // read info
-        String[] strA = line.split("\\s+");
-
-        stats[d][0] = (int) Double.parseDouble(strA[0]);
-        stats[d][1] = (int) Double.parseDouble(strA[1]);
-        stats[d][2] = (int) Double.parseDouble(strA[2]);
-        stats[d][3] = (int) Double.parseDouble(strA[3]);
-        stats[d][4] = (int) Double.parseDouble(strA[4]);
-      }
-      
-      inFile.close();
-    } catch (IOException e) {
-      System.err.println("IOException in METEOR.suffStats(String[],int[]): " + e.getMessage());
-      System.exit(99902);
-    } catch (InterruptedException e) {
-      System.err.println("InterruptedException in METEOR.suffStats(String[],int[]): "
-          + e.getMessage());
-      System.exit(99903);
-    }
-
-    return stats;
-  }
-
-  public double score(int[] stats) {
-    if (stats.length != suffStatsCount) {
-      System.out.println("Mismatch between stats.length and suffStatsCount (" + stats.length
-          + " vs. " + suffStatsCount + ") in METEOR.score(int[])");
-      System.exit(1);
-    }
-
-    double sc = 0.0;
-
-    // sc = ???
-
-    return sc;
-  }
-
-  public void printDetailedScore_fromStats(int[] stats, boolean oneLiner) {
-    if (oneLiner) {
-      System.out.println("METEOR = METEOR(" + stats[0] + "," + stats[1] + "," + stats[2] + ","
-          + stats[3] + "," + stats[4] + " = " + score(stats));
-    } else {
-      System.out.println("# matches = " + stats[0]);
-      System.out.println("test length = " + stats[1]);
-      System.out.println("ref length = " + stats[2]);
-      System.out.println("# chunks = " + stats[3]);
-      System.out.println("length cost = " + stats[4]);
-      System.out.println("METEOR = " + score(stats));
-    }
-  }
-
-  private void writeLine(String line, BufferedWriter writer) throws IOException {
-    writer.write(line, 0, line.length());
-    writer.newLine();
-    writer.flush();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/metrics/MinimumChangeBLEU.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/MinimumChangeBLEU.java b/src/joshua/metrics/MinimumChangeBLEU.java
deleted file mode 100644
index fa764c3..0000000
--- a/src/joshua/metrics/MinimumChangeBLEU.java
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.metrics;
-
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.logging.Logger;
-
-import joshua.util.Algorithms;
-
-public class MinimumChangeBLEU extends BLEU {
-  private static final Logger logger = Logger.getLogger(MinimumChangeBLEU.class.getName());
-
-  // we assume that the source for the paraphrasing run is
-  // part of the set of references
-  private int sourceReferenceIndex;
-  private double thresholdWER;
-
-
-  public MinimumChangeBLEU() {
-    super();
-    this.sourceReferenceIndex = 0;
-    this.thresholdWER = 0.3;
-    initialize();
-  }
-
-
-  public MinimumChangeBLEU(String[] options) {
-    super(options);
-    this.sourceReferenceIndex = Integer.parseInt(options[2]);
-    this.thresholdWER = Double.parseDouble(options[3]);
-    initialize();
-  }
-
-
-  protected void initialize() {
-    metricName = "MC_BLEU";
-    toBeMinimized = false;
-    // adding 1 to the sufficient stats for regular BLEU
-    suffStatsCount = 2 * maxGramLength + 3;
-
-    set_weightsArray();
-    set_maxNgramCounts();
-  }
-
-
-  protected void set_maxNgramCounts() {
-    @SuppressWarnings("unchecked")
-    HashMap<String, Integer>[] temp_HMA = new HashMap[numSentences];
-    maxNgramCounts = temp_HMA;
-
-    String gram = "";
-    int oldCount = 0, nextCount = 0;
-
-    for (int i = 0; i < numSentences; ++i) {
-      // update counts as necessary from the reference translations
-      for (int r = 0; r < refsPerSen; ++r) {
-        // skip source reference
-        if (r == this.sourceReferenceIndex) continue;
-        if (maxNgramCounts[i] == null) {
-          maxNgramCounts[i] = getNgramCountsAll(refSentences[i][r]);
-        } else {
-          HashMap<String, Integer> nextNgramCounts = getNgramCountsAll(refSentences[i][r]);
-          for (Map.Entry<String, Integer> entry : nextNgramCounts.entrySet()) {
-            gram = entry.getKey();
-            nextCount = entry.getValue();
-
-            if (maxNgramCounts[i].containsKey(gram)) {
-              oldCount = maxNgramCounts[i].get(gram);
-              if (nextCount > oldCount) {
-                maxNgramCounts[i].put(gram, nextCount);
-              }
-            } else { // add it
-              maxNgramCounts[i].put(gram, nextCount);
-            }
-          }
-        }
-      } // for (r)
-    } // for (i)
-
-    // for efficiency, calculate the reference lenghts, which will be used
-    // in effLength...
-    refWordCount = new int[numSentences][refsPerSen];
-    for (int i = 0; i < numSentences; ++i) {
-      for (int r = 0; r < refsPerSen; ++r) {
-        if (r == this.sourceReferenceIndex) continue;
-        refWordCount[i][r] = wordCount(refSentences[i][r]);
-      }
-    }
-  }
-
-
-  public int[] suffStats(String cand_str, int i) {
-    int[] stats = new int[suffStatsCount];
-
-    String[] candidate_words;
-    if (!cand_str.equals(""))
-      candidate_words = cand_str.split("\\s+");
-    else
-      candidate_words = new String[0];
-
-    // dropping "_OOV" marker
-    for (int j = 0; j < candidate_words.length; j++) {
-      if (candidate_words[j].endsWith("_OOV"))
-        candidate_words[j] = candidate_words[j].substring(0, candidate_words[j].length() - 4);
-    }
-
-    set_prec_suffStats(stats, candidate_words, i);
-    String[] source_words = refSentences[i][sourceReferenceIndex].split("\\s+");
-    stats[suffStatsCount - 1] = Algorithms.levenshtein(candidate_words, source_words);
-    stats[suffStatsCount - 2] = effLength(candidate_words.length, i);
-    stats[suffStatsCount - 3] = candidate_words.length;
-
-    return stats;
-  }
-
-
-  public int effLength(int candLength, int i) {
-    if (effLengthMethod == EffectiveLengthMethod.CLOSEST) {
-      int closestRefLength = Integer.MIN_VALUE;
-      int minDiff = Math.abs(candLength - closestRefLength);
-
-      for (int r = 0; r < refsPerSen; ++r) {
-        if (r == this.sourceReferenceIndex) continue;
-        int nextRefLength = refWordCount[i][r];
-        int nextDiff = Math.abs(candLength - nextRefLength);
-
-        if (nextDiff < minDiff) {
-          closestRefLength = nextRefLength;
-          minDiff = nextDiff;
-        } else if (nextDiff == minDiff && nextRefLength < closestRefLength) {
-          closestRefLength = nextRefLength;
-          minDiff = nextDiff;
-        }
-      }
-      return closestRefLength;
-    } else if (effLengthMethod == EffectiveLengthMethod.SHORTEST) {
-      int shortestRefLength = Integer.MAX_VALUE;
-
-      for (int r = 0; r < refsPerSen; ++r) {
-        if (r == this.sourceReferenceIndex) continue;
-
-        int nextRefLength = refWordCount[i][r];
-        if (nextRefLength < shortestRefLength) {
-          shortestRefLength = nextRefLength;
-        }
-      }
-      return shortestRefLength;
-    }
-
-    return candLength; // should never get here anyway
-  }
-
-
-  public double score(int[] stats) {
-    if (stats.length != suffStatsCount) {
-      logger.severe("Mismatch between stats.length and " + "suffStatsCount (" + stats.length
-          + " vs. " + suffStatsCount + ") in BLEU.score(int[])");
-      System.exit(2);
-    }
-
-    double accuracy = 0.0;
-    double smooth_addition = 1.0; // following bleu-1.04.pl
-    double c_len = stats[suffStatsCount - 3];
-    double r_len = stats[suffStatsCount - 2];
-
-    double wer = stats[suffStatsCount - 1] / c_len;
-    double wer_penalty = (wer >= thresholdWER) ? 1.0 : (wer / thresholdWER);
-
-    double correctGramCount, totalGramCount;
-
-    for (int n = 1; n <= maxGramLength; ++n) {
-      correctGramCount = stats[2 * (n - 1)];
-      totalGramCount = stats[2 * (n - 1) + 1];
-
-      double prec_n;
-      if (totalGramCount > 0) {
-        prec_n = correctGramCount / totalGramCount;
-      } else {
-        prec_n = 1; // following bleu-1.04.pl ???????
-      }
-
-      if (prec_n == 0) {
-        smooth_addition *= 0.5;
-        prec_n = smooth_addition / (c_len - n + 1);
-        // isn't c_len-n+1 just totalGramCount ???????
-      }
-      accuracy += weights[n] * Math.log(prec_n);
-    }
-    double brevity_penalty = 1.0;
-    if (c_len < r_len) brevity_penalty = Math.exp(1 - (r_len / c_len));
-
-    return wer_penalty * brevity_penalty * Math.exp(accuracy);
-  }
-
-
-  public void printDetailedScore_fromStats(int[] stats, boolean oneLiner) {
-    double wer = stats[suffStatsCount - 1] / stats[suffStatsCount - 3];
-    double wer_penalty = (wer >= thresholdWER) ? 1.0d : (wer / thresholdWER);
-
-    System.out.println("WER_penalty = " + wer_penalty);
-    System.out.println("MC_BLEU= " + score(stats));
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/metrics/NewMetric.java.template
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/NewMetric.java.template b/src/joshua/metrics/NewMetric.java.template
deleted file mode 100644
index 3b8ed83..0000000
--- a/src/joshua/metrics/NewMetric.java.template
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.zmert;
-import java.math.*;
-import java.util.*;
-import java.io.*;
-
-***(1)***
-public class __new_metric_CLASS_name__ extends EvaluationMetric
-{
-  /********************************************
-    private data members for this error metric
-  ********************************************/
-
-  ***(2)***
-  private ;
-  private ;
-  private ;
-
-  /*
-     You already have access to these data members of the parent
-     class (EvaluationMetric):
-         int numSentences;
-           number of sentences in the MERT set
-         int refsPerSen;
-           number of references per sentence
-         String[][] refSentences;
-           refSentences[i][r] stores the r'th reference of the i'th
-           source sentence (both indices are 0-based)
-  */
-  /********************************************
-  ********************************************/
-
-  public constructorNameMustMatchClassName(String[] Metric_options)
-  {
-
-                ***(3)***
-
-    //
-    //
-    // process the Metric_options array
-    //
-    //
-
-    initialize(); // set the data members of the metric
-  }
-
-  protected void initialize()
-  {
-    ***(4)***
-    metricName = "XXXXXXXX";    <- pick a metric name
-    toBeMinimized = true/false; <- should it be minimized?
-    suffStatsCount = ???;       <- how many SS does the metric need?
-
-    ***(5)***
-    /* here you make calls to any methods that set the data members */
-    /* here you make calls to any methods that set the data members */
-    /* here you make calls to any methods that set the data members */
-  }
-
-  ***(6)***
-  public double bestPossibleScore() { return ???; }
-    --> what's the best score of the metric? <--
-  public double worstPossibleScore() { return ???; }
-    --> what's the worst score of the metric? <--
-
-  ***(7)***
-  /* here you define any methods that set the data members */
-  /* here you define any methods that set the data members */
-  /* here you define any methods that set the data members */
-
-  ***(8)***
-  public int[] suffStats(String cand_str, int i) throws Exception
-  {
-    int[] stats = new int[suffStatsCount];
-
-    //
-    //
-    // set contents of stats[] here!
-    //
-    //
-
-    return stats;
-  }
-
-  ***(9a)***
-  public double score(int[] stats)
-  {
-    if (stats.length != suffStatsCount) {
-      System.out.println("Mismatch between stats.length and suffStatsCount (" + stats.length + " vs. " + suffStatsCount + ") in NewMetric.score(int[])");
-      System.exit(1);
-    }
-
-    double sc = 0.0;
-
-    //
-    //
-    // set sc here!
-    //
-    //
-
-    return sc;
-  }
-
-  ***(9b)***
-  public void printDetailedScore_fromStats(int[] stats, boolean oneLiner)
-  {
-    System.out.println(metricName + " = " + score(stats));
-
-    //
-    //
-    // optional (for debugging purposes)
-    //
-    //
-  }
-
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/metrics/Precis.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/Precis.java b/src/joshua/metrics/Precis.java
deleted file mode 100644
index 82f4106..0000000
--- a/src/joshua/metrics/Precis.java
+++ /dev/null
@@ -1,332 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.metrics;
-
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.logging.Logger;
-
-import joshua.util.Algorithms;
-
-// The metric re-uses most of the BLEU code
-public class Precis extends BLEU {
-  private static final Logger logger = Logger.getLogger(Precis.class.getName());
-
-  private static final double REF_CR = -1.0;
-
-  // We assume that the source for the paraphrasing run is
-  // part of the set of references, this is its index.
-  private int sourceReferenceIndex;
-
-  // A global target compression rate to achieve
-  // if negative, we default to locally aiming for the compression
-  // rate given by the (closest) reference compression?
-  private double targetCompressionRate;
-
-  // Are we optimizing for character-based compression (as opposed
-  // to token-based)?
-  private boolean characterBased;
-
-  // Weight for factoring in Levenshtein distance to source as a penalty for
-  // insufficient change.
-  private double similarityWeight;
-
-  public Precis() {
-    super();
-    this.sourceReferenceIndex = 0;
-    this.targetCompressionRate = 0;
-    this.characterBased = false;
-    this.similarityWeight = 0;
-    initialize();
-  }
-
-  // We require the BLEU arguments (that's 2) plus
-  // 3 of our own (see above) - the total is registered with
-  // ZMERT in EvaluationMetric, line ~66
-  public Precis(String[] options) {
-    super(options);
-    this.sourceReferenceIndex = Integer.parseInt(options[2]);
-
-    if ("ref".equals(options[3])) {
-      targetCompressionRate = REF_CR;
-    } else {
-      targetCompressionRate = Double.parseDouble(options[3]);
-      if (targetCompressionRate > 1 || targetCompressionRate < 0)
-        throw new RuntimeException("Invalid compression ratio requested: " + options[3]);
-    }
-
-    if ("chars".equals(options[4]))
-      this.characterBased = true;
-    else if ("words".equals(options[4]))
-      this.characterBased = false;
-    else
-      throw new RuntimeException("Unknown compression style: " + options[4]);
-
-    similarityWeight = Double.parseDouble(options[5]);
-    if (similarityWeight < 0 || similarityWeight > 1)
-      throw new RuntimeException("Source penalty out of bounds: " + options[5]);
-
-    initialize();
-  }
-
-  // in addition to BLEU's statistics, we store some length info;
-  // for character-based compression we need to store more (for token-based
-  // BLEU already has us partially covered by storing some num_of_words)
-  //
-  // here's where you'd make additional room for statistics of your own
-  protected void initialize() {
-    metricName = "PRECIS";
-    toBeMinimized = false;
-    // Adding 3 to the sufficient stats for regular BLEU - character-based
-    // compression requires extra stats. We additionally store the Levenshtein
-    // distance to the source, the source length in tokens and the source
-    // length relevant
-    suffStatsCount = 2 * maxGramLength + 4 + (this.characterBased ? 3 : 0);
-
-    set_weightsArray();
-    set_maxNgramCounts();
-  }
-
-  // The only difference to BLEU here is that we're excluding the input from
-  // the collection of ngram statistics - that's actually up for debate
-  protected void set_maxNgramCounts() {
-    @SuppressWarnings("unchecked")
-    HashMap<String, Integer>[] temp_HMA = new HashMap[numSentences];
-    maxNgramCounts = temp_HMA;
-
-    String gram = "";
-    int oldCount = 0, nextCount = 0;
-
-    for (int i = 0; i < numSentences; ++i) {
-      // update counts as necessary from the reference translations
-      for (int r = 0; r < refsPerSen; ++r) {
-        // skip source reference
-        if (r == this.sourceReferenceIndex) continue;
-        if (maxNgramCounts[i] == null) {
-          maxNgramCounts[i] = getNgramCountsAll(refSentences[i][r]);
-        } else {
-          HashMap<String, Integer> nextNgramCounts = getNgramCountsAll(refSentences[i][r]);
-          for ( Map.Entry<String, Integer> entry : nextNgramCounts.entrySet() ) {
-            gram = entry.getKey();
-            nextCount = entry.getValue();
-
-            if (maxNgramCounts[i].containsKey(gram)) {
-              oldCount = maxNgramCounts[i].get(gram);
-              if (nextCount > oldCount) {
-                maxNgramCounts[i].put(gram, nextCount);
-              }
-            } else { // add it
-              maxNgramCounts[i].put(gram, nextCount);
-            }
-          }
-        }
-      } // for (r)
-    } // for (i)
-
-    // for efficiency, calculate the reference lengths, which will be used
-    // in effLength...
-    refWordCount = new int[numSentences][refsPerSen];
-    for (int i = 0; i < numSentences; ++i) {
-      for (int r = 0; r < refsPerSen; ++r) {
-        refWordCount[i][r] = wordCount(refSentences[i][r]);
-      }
-    }
-  }
-
-  // computation of statistics
-  public int[] suffStats(String cand_str, int i) {
-    int[] stats = new int[suffStatsCount];
-
-    String[] candidate_words;
-    if (!cand_str.equals(""))
-      candidate_words = cand_str.split("\\s+");
-    else
-      candidate_words = new String[0];
-
-    // Set n-gram precision stats.
-    set_prec_suffStats(stats, candidate_words, i);
-
-    // Same as BLEU.
-    stats[2 * maxGramLength] = candidate_words.length;
-    stats[2 * maxGramLength + 1] = effLength(candidate_words.length, i);
-
-    // Source length in tokens.
-    stats[2 * maxGramLength + 2] = refWordCount[i][sourceReferenceIndex];
-
-    // Character-based compression requires stats in character counts.
-    if (this.characterBased) {
-      // Candidate length in characters.
-      stats[suffStatsCount - 4] = cand_str.length() - candidate_words.length + 1;
-      // Reference length in characters.
-      stats[suffStatsCount - 3] = effLength(stats[suffStatsCount - 4], i, true);
-      // Source length in characters.
-      stats[suffStatsCount - 2] =
-          refSentences[i][sourceReferenceIndex].length() - refWordCount[i][sourceReferenceIndex]
-              + 1;
-    }
-
-    // Levenshtein distance to source.
-    if (this.similarityWeight > 0)
-      stats[suffStatsCount - 1] =
-          Algorithms.levenshtein(candidate_words,
-              refSentences[i][sourceReferenceIndex].split("\\s+"));
-
-    return stats;
-  }
-
-  public int effLength(int candLength, int i) {
-    return effLength(candLength, i, false);
-  }
-
-  // hacked to be able to return character length upon request
-  public int effLength(int candLength, int i, boolean character_length) {
-    if (effLengthMethod == EffectiveLengthMethod.CLOSEST) {
-      int closestRefLength = Integer.MIN_VALUE;
-      int minDiff = Math.abs(candLength - closestRefLength);
-
-      for (int r = 0; r < refsPerSen; ++r) {
-        if (r == this.sourceReferenceIndex) continue;
-        int nextRefLength =
-            (character_length
-                ? refSentences[i][r].length() - refWordCount[i][r] + 1
-                : refWordCount[i][r]);
-        int nextDiff = Math.abs(candLength - nextRefLength);
-
-        if (nextDiff < minDiff) {
-          closestRefLength = nextRefLength;
-          minDiff = nextDiff;
-        } else if (nextDiff == minDiff && nextRefLength < closestRefLength) {
-          closestRefLength = nextRefLength;
-          minDiff = nextDiff;
-        }
-      }
-      return closestRefLength;
-    } else if (effLengthMethod == EffectiveLengthMethod.SHORTEST) {
-      int shortestRefLength = Integer.MAX_VALUE;
-
-      for (int r = 0; r < refsPerSen; ++r) {
-        if (r == this.sourceReferenceIndex) continue;
-
-        int nextRefLength =
-            (character_length
-                ? refSentences[i][r].length() - refWordCount[i][r] + 1
-                : refWordCount[i][r]);
-        if (nextRefLength < shortestRefLength) {
-          shortestRefLength = nextRefLength;
-        }
-      }
-      return shortestRefLength;
-    }
-
-    return candLength; // should never get here anyway
-  }
-
-  // calculate the actual score from the statistics
-  public double score(int[] stats) {
-    if (stats.length != suffStatsCount) {
-      logger.severe("Mismatch between stats.length and suffStatsCount (" + stats.length + " vs. "
-          + suffStatsCount + ") in Precis.score(int[])");
-      System.exit(2);
-    }
-
-    double accuracy = 0.0;
-    double smooth_addition = 1.0; // following bleu-1.04.pl
-
-    double cnd_len = stats[2 * maxGramLength];
-    double ref_len = stats[2 * maxGramLength + 1];
-    double src_len = stats[2 * maxGramLength + 2];
-    double compression_cnd_len = stats[suffStatsCount - 4];
-    double compression_ref_len = stats[suffStatsCount - 3];
-    double compression_src_len = stats[suffStatsCount - 2];
-    double src_lev = stats[suffStatsCount - 1];
-
-    double compression_ratio = compression_cnd_len / compression_src_len;
-
-    double verbosity_penalty =
-        getVerbosityPenalty(compression_ratio, (targetCompressionRate == REF_CR
-            ? compression_ref_len / compression_src_len
-            : targetCompressionRate));
-
-    // this part matches BLEU
-    double correctGramCount, totalGramCount;
-    for (int n = 1; n <= maxGramLength; ++n) {
-      correctGramCount = stats[2 * (n - 1)];
-      totalGramCount = stats[2 * (n - 1) + 1];
-      double prec_n;
-      if (totalGramCount > 0) {
-        prec_n = correctGramCount / totalGramCount;
-      } else {
-        prec_n = 1;
-      }
-      if (prec_n == 0) {
-        smooth_addition *= 0.5;
-        prec_n = smooth_addition / (cnd_len - n + 1);
-      }
-      accuracy += weights[n] * Math.log(prec_n);
-    }
-    double brevity_penalty = 1.0;
-    double similarity_penalty = similarityWeight * Math.max(0, 1 - src_lev / src_len);
-
-    if (cnd_len < ref_len) brevity_penalty = Math.exp(1 - (ref_len / cnd_len));
-
-    // We add on our penalties on top of BLEU.
-    return verbosity_penalty * brevity_penalty * Math.exp(accuracy) - similarity_penalty;
-  }
-
-  // Somewhat not-so-detailed, this is used in the JoshuaEval tool.
-  public void printDetailedScore_fromStats(int[] stats, boolean oneLiner) {
-    double cnd_len = stats[2 * maxGramLength];
-    double ref_len = stats[2 * maxGramLength + 1];
-    double src_len = stats[2 * maxGramLength + 2];
-    double compression_cnd_len = stats[suffStatsCount - 4];
-    double compression_ref_len = stats[suffStatsCount - 3];
-    double compression_src_len = stats[suffStatsCount - 2];
-    double src_lev = stats[suffStatsCount - 1];
-
-    double brevity_penalty = 1;
-    if (cnd_len < ref_len) brevity_penalty = Math.exp(1 - (ref_len / cnd_len));
-
-    double cr = compression_cnd_len / compression_src_len;
-    double similarity_penalty = Math.max(0, 1 - src_lev / src_len);
-
-    double verbosity_penalty =
-        getVerbosityPenalty(cr, (targetCompressionRate == REF_CR ? compression_ref_len
-            / compression_src_len : targetCompressionRate));
-
-    System.out.println(String.format("Similarity Penalty = %.2f * %.4f", similarityWeight,
-        similarity_penalty));
-    System.out.println(String.format("Verbosity Penalty  = %.4f", verbosity_penalty));
-    System.out.println(String.format("Brevity Penalty    = %.4f", brevity_penalty));
-    System.out.println(String.format("Precis             = %.4f", score(stats)));
-  }
-
-  // Returns the score penalty as a function of the achieved and target
-  // compression rates currently an exponential fall-off to make sure the not
-  // compressing enough is costly.
-  protected static double getVerbosityPenalty(double cr, double target_rate) {
-    if (cr <= target_rate)
-      return 1.0;
-    else {
-      // linear option: (1 - cr) / (1 - compressionRate);
-      // doesn't penalize insufficient compressions hard enough
-      return Math.exp(5 * (target_rate - cr));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/metrics/PrecisMinusSourceBLEU.java
----------------------------------------------------------------------
diff --git a/src/joshua/metrics/PrecisMinusSourceBLEU.java b/src/joshua/metrics/PrecisMinusSourceBLEU.java
deleted file mode 100644
index f56f8cb..0000000
--- a/src/joshua/metrics/PrecisMinusSourceBLEU.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.metrics;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.PrintWriter;
-
-public class PrecisMinusSourceBLEU extends EvaluationMetric {
-
-  private Precis myPrecis;
-  private SourceBLEU mySourceBLEU;
-
-  private double bleuWeight;
-
-  private int precisCount;
-  private int sourceBleuCount;
-
-  public PrecisMinusSourceBLEU(String[] options) {
-    // Automatically deactivate Levenshtein penalty for Precis.
-    bleuWeight = Double.parseDouble(options[5]);
-    options[5] = "0";
-
-    myPrecis = new Precis(options);
-    mySourceBLEU =
-        new SourceBLEU(Integer.parseInt(options[0]), options[1], Integer.parseInt(options[2]),
-            false);
-
-    initialize();
-  }
-
-  protected void initialize() {
-    metricName = "PRECIS-SRC_BLEU";
-    toBeMinimized = false;
-    precisCount = myPrecis.suffStatsCount;
-    sourceBleuCount = mySourceBLEU.suffStatsCount;
-    suffStatsCount = precisCount + sourceBleuCount;
-  }
-
-  public double bestPossibleScore() {
-    return 1.0;
-  }
-
-  public double worstPossibleScore() {
-    return -1.0;
-  }
-
-  public int[] suffStats(String cand_str, int i) {
-    return null;
-  }
-
-  public int[][] suffStats(String[] cand_strings, int[] cand_indices) {
-    int candCount = cand_strings.length;
-    if (cand_indices.length != candCount) {
-      System.out.println("Array lengths mismatch in suffStats(String[],int[]); returning null.");
-      return null;
-    }
-
-    int[][] stats = new int[candCount][suffStatsCount];
-
-    int[][] precis_stats = myPrecis.suffStats(cand_strings, cand_indices);
-    int[][] source_bleu_stats = mySourceBLEU.suffStats(cand_strings, cand_indices);
-
-    for (int d = 0; d < candCount; ++d) {
-      int s = 0;
-      for (int s_T = 0; s_T < precisCount; s_T++) {
-        stats[d][s] = precis_stats[d][s_T];
-        ++s;
-      }
-      for (int s_B = 0; s_B < sourceBleuCount; s_B++) {
-        stats[d][s] = source_bleu_stats[d][s_B];
-        ++s;
-      }
-    }
-    return stats;
-  }
-
-  public void createSuffStatsFile(String cand_strings_fileName, String cand_indices_fileName,
-      String outputFileName, int maxBatchSize) {
-    try {
-      myPrecis.createSuffStatsFile(cand_strings_fileName, cand_indices_fileName, outputFileName
-          + ".PRECIS", maxBatchSize);
-      mySourceBLEU.createSuffStatsFile(cand_strings_fileName, cand_indices_fileName, outputFileName
-          + ".SRC_BLEU", maxBatchSize);
-
-      PrintWriter outFile = new PrintWriter(outputFileName);
-
-      FileInputStream inStream_Precis = new FileInputStream(outputFileName + ".PRECIS");
-      BufferedReader inFile_Precis =
-          new BufferedReader(new InputStreamReader(inStream_Precis, "utf8"));
-
-      FileInputStream inStream_SourceBLEU = new FileInputStream(outputFileName + ".SRC_BLEU");
-      BufferedReader inFile_SourceBLEU =
-          new BufferedReader(new InputStreamReader(inStream_SourceBLEU, "utf8"));
-
-      String line_Precis = inFile_Precis.readLine();
-      String line_SourceBLEU = inFile_SourceBLEU.readLine();
-
-      // combine the two files into one
-      while (line_Precis != null) {
-        outFile.println(line_Precis + " " + line_SourceBLEU);
-        line_Precis = inFile_Precis.readLine();
-        line_SourceBLEU = inFile_SourceBLEU.readLine();
-      }
-
-      inFile_Precis.close();
-      inFile_SourceBLEU.close();
-      outFile.close();
-
-      File fd;
-      fd = new File(outputFileName + ".PRECIS");
-      if (fd.exists()) fd.delete();
-      fd = new File(outputFileName + ".SRC_BLEU");
-      if (fd.exists()) fd.delete();
-    } catch (IOException e) {
-      System.err.println("IOException: " + e.getMessage());
-      System.exit(99902);
-    }
-  }
-
-  public double score(int[] stats) {
-    if (stats.length != suffStatsCount) {
-      System.out.println("Mismatch between stats.length and suffStatsCount (" + stats.length
-          + " vs. " + suffStatsCount + ") in PrecisMinusSourceBLEU.score(int[])");
-      System.exit(1);
-    }
-
-    double sc = 0.0;
-
-    int[] stats_Precis = new int[precisCount];
-    int[] stats_SourceBLEU = new int[sourceBleuCount];
-    for (int s = 0; s < precisCount; ++s) {
-      stats_Precis[s] = stats[s];
-    }
-    for (int s = 0; s < sourceBleuCount; ++s) {
-      stats_SourceBLEU[s] = stats[s + precisCount];
-    }
-
-    double sc_T = myPrecis.score(stats_Precis);
-    double sc_B = mySourceBLEU.score(stats_SourceBLEU);
-
-    sc = sc_T - (bleuWeight * sc_B);
-
-    return sc;
-  }
-
-  public void printDetailedScore_fromStats(int[] stats, boolean oneLiner) {
-    int[] stats_Precis = new int[precisCount];
-    int[] stats_SourceBLEU = new int[sourceBleuCount];
-    for (int s = 0; s < precisCount; ++s) {
-      stats_Precis[s] = stats[s];
-    }
-    for (int s = 0; s < sourceBleuCount; ++s) {
-      stats_SourceBLEU[s] = stats[s + precisCount];
-    }
-
-    System.out.println("---PRECIS---");
-    myPrecis.printDetailedScore_fromStats(stats_Precis, oneLiner);
-    System.out.println("---SRC_BLEU---");
-    mySourceBLEU.printDetailedScore_fromStats(stats_SourceBLEU, oneLiner);
-    System.out.println("---------");
-    System.out.println("  => " + metricName + " = " + f4.format(score(stats)));
-  }
-
-}