You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/23 22:17:57 UTC
[40/50] [abbrv] incubator-joshua git commit: Merge branch 'master' into 7

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
index f374279,0000000..10efdc6
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
+++ b/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
@@@ -1,411 -1,0 +1,412 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.corpus.syntax;
 +
 +import java.io.Externalizable;
 +import java.io.IOException;
 +import java.io.ObjectInput;
 +import java.io.ObjectOutput;
 +import java.util.ArrayList;
 +import java.util.Collection;
 +import java.util.HashMap;
 +import java.util.HashSet;
 +import java.util.Map;
 +import java.util.Set;
 +import java.util.Stack;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.util.io.LineReader;
 +
 +public class ArraySyntaxTree implements SyntaxTree, Externalizable {
 +
 +  /**
 +   * Note that index stores the indices of lattice node positions, i.e. the last element of index is
 +   * the terminal node, pointing to lattice.size()
 +   */
 +  private ArrayList<Integer> forwardIndex;
 +  private ArrayList<Integer> forwardLattice;
 +  private ArrayList<Integer> backwardIndex;
 +  private ArrayList<Integer> backwardLattice;
 +
 +  private ArrayList<Integer> terminals;
 +
-   private boolean useBackwardLattice = true;
++  private final boolean useBackwardLattice = true;
 +
 +  private static final int MAX_CONCATENATIONS = 3;
 +  private static final int MAX_LABELS = 100;
 +
 +  public ArraySyntaxTree() {
 +    forwardIndex = null;
 +    forwardLattice = null;
 +    backwardIndex = null;
 +    backwardLattice = null;
 +
 +    terminals = null;
 +  }
 +
 +
 +  public ArraySyntaxTree(String parsed_line) {
 +    initialize();
 +    appendFromPennFormat(parsed_line);
 +  }
 +
 +
 +  /**
 +   * Returns a collection of single-non-terminal labels that exactly cover the specified span in the
 +   * lattice.
 +   */
 +  public Collection<Integer> getConstituentLabels(int from, int to) {
-     Collection<Integer> labels = new HashSet<Integer>();
++    Collection<Integer> labels = new HashSet<>();
 +    int span_length = to - from;
 +    for (int i = forwardIndex.get(from); i < forwardIndex.get(from + 1); i += 2) {
 +      int current_span = forwardLattice.get(i + 1);
 +      if (current_span == span_length)
 +        labels.add(forwardLattice.get(i));
 +      else if (current_span < span_length) break;
 +    }
 +    return labels;
 +  }
 +
 +
 +  public int getOneConstituent(int from, int to) {
 +    int spanLength = to - from;
-     Stack<Integer> stack = new Stack<Integer>();
++    Stack<Integer> stack = new Stack<>();
 +
 +    for (int i = forwardIndex.get(from); i < forwardIndex.get(from + 1); i += 2) {
 +      int currentSpan = forwardLattice.get(i + 1);
 +      if (currentSpan == spanLength) {
 +        return forwardLattice.get(i);
 +      } else if (currentSpan < spanLength) break;
 +    }
 +    if (stack.isEmpty()) return 0;
 +    StringBuilder sb = new StringBuilder();
 +    while (!stack.isEmpty()) {
 +      String w = Vocabulary.word(stack.pop());
 +      if (sb.length() != 0) sb.append(":");
 +      sb.append(w);
 +    }
 +    String label = sb.toString();
 +    return Vocabulary.id(adjustMarkup(label));
 +  }
 +
 +
 +  public int getOneSingleConcatenation(int from, int to) {
 +    for (int midpt = from + 1; midpt < to; midpt++) {
 +      int x = getOneConstituent(from, midpt);
 +      if (x == 0) continue;
 +      int y = getOneConstituent(midpt, to);
 +      if (y == 0) continue;
 +      String label = Vocabulary.word(x) + "+" + Vocabulary.word(y);
 +      return Vocabulary.id(adjustMarkup(label));
 +    }
 +    return 0;
 +  }
 +
 +
 +  public int getOneDoubleConcatenation(int from, int to) {
 +    for (int a = from + 1; a < to - 1; a++) {
 +      for (int b = a + 1; b < to; b++) {
 +        int x = getOneConstituent(from, a);
 +        if (x == 0) continue;
 +        int y = getOneConstituent(a, b);
 +        if (y == 0) continue;
 +        int z = getOneConstituent(b, to);
 +        if (z == 0) continue;
 +        String label = Vocabulary.word(x) + "+" + Vocabulary.word(y) + "+" + Vocabulary.word(z);
 +        return Vocabulary.id(adjustMarkup(label));
 +      }
 +    }
 +    return 0;
 +  }
 +
 +
 +  public int getOneRightSideCCG(int from, int to) {
 +    for (int end = to + 1; end <= forwardLattice.size(); end++) {
 +      int x = getOneConstituent(from, end);
 +      if (x == 0) continue;
 +      int y = getOneConstituent(to, end);
 +      if (y == 0) continue;
 +      String label = Vocabulary.word(x) + "/" + Vocabulary.word(y);
 +      return Vocabulary.id(adjustMarkup(label));
 +    }
 +    return 0;
 +  }
 +
 +
 +  public int getOneLeftSideCCG(int from, int to) {
 +    for (int start = from - 1; start >= 0; start--) {
 +      int x = getOneConstituent(start, to);
 +      if (x == 0) continue;
 +      int y = getOneConstituent(start, from);
 +      if (y == 0) continue;
 +      String label = Vocabulary.word(y) + "\\" + Vocabulary.word(x);
 +      return Vocabulary.id(adjustMarkup(label));
 +    }
 +    return 0;
 +  }
 +
 +
 +  /**
 +   * Returns a collection of concatenated non-terminal labels that exactly cover the specified span
 +   * in the lattice. The number of non-terminals concatenated is limited by MAX_CONCATENATIONS and
 +   * the total number of labels returned is bounded by MAX_LABELS.
 +   */
 +  public Collection<Integer> getConcatenatedLabels(int from, int to) {
-     Collection<Integer> labels = new HashSet<Integer>();
++    Collection<Integer> labels = new HashSet<>();
 +
 +    int span_length = to - from;
-     Stack<Integer> nt_stack = new Stack<Integer>();
-     Stack<Integer> pos_stack = new Stack<Integer>();
-     Stack<Integer> depth_stack = new Stack<Integer>();
++    Stack<Integer> nt_stack = new Stack<>();
++    Stack<Integer> pos_stack = new Stack<>();
++    Stack<Integer> depth_stack = new Stack<>();
 +
 +    // seed stacks (reverse order to save on iterations, longer spans)
 +    for (int i = forwardIndex.get(from + 1) - 2; i >= forwardIndex.get(from); i -= 2) {
 +      int current_span = forwardLattice.get(i + 1);
 +      if (current_span < span_length) {
 +        nt_stack.push(forwardLattice.get(i));
 +        pos_stack.push(from + current_span);
 +        depth_stack.push(1);
 +      } else if (current_span >= span_length) break;
 +    }
 +
 +    while (!nt_stack.isEmpty() && labels.size() < MAX_LABELS) {
 +      int nt = nt_stack.pop();
 +      int pos = pos_stack.pop();
 +      int depth = depth_stack.pop();
 +
 +      // maximum depth reached without filling span
 +      if (depth == MAX_CONCATENATIONS) continue;
 +
 +      int remaining_span = to - pos;
 +      for (int i = forwardIndex.get(pos + 1) - 2; i >= forwardIndex.get(pos); i -= 2) {
 +        int current_span = forwardLattice.get(i + 1);
 +        if (current_span > remaining_span) break;
 +
 +        // create and look up concatenated label
 +        int concatenated_nt =
 +            Vocabulary.id(adjustMarkup(Vocabulary.word(nt) + "+"
 +                + Vocabulary.word(forwardLattice.get(i))));
 +        if (current_span < remaining_span) {
 +          nt_stack.push(concatenated_nt);
 +          pos_stack.push(pos + current_span);
 +          depth_stack.push(depth + 1);
 +        } else if (current_span == remaining_span) {
 +          labels.add(concatenated_nt);
 +        }
 +      }
 +    }
 +
 +    return labels;
 +  }
 +
 +  // TODO: can pre-comupute all that in top-down fashion.
 +  public Collection<Integer> getCcgLabels(int from, int to) {
-     Collection<Integer> labels = new HashSet<Integer>();
++    Collection<Integer> labels = new HashSet<>();
 +
 +    int span_length = to - from;
 +    // TODO: range checks on the to and from
 +
 +    boolean is_prefix = (forwardLattice.get(forwardIndex.get(from) + 1) > span_length);
 +    if (is_prefix) {
-       Map<Integer, Set<Integer>> main_constituents = new HashMap<Integer, Set<Integer>>();
++      Map<Integer, Set<Integer>> main_constituents = new HashMap<>();
 +      // find missing to the right
 +      for (int i = forwardIndex.get(from); i < forwardIndex.get(from + 1); i += 2) {
 +        int current_span = forwardLattice.get(i + 1);
 +        if (current_span <= span_length)
 +          break;
 +        else {
 +          int end_pos = forwardLattice.get(i + 1) + from;
 +          Set<Integer> nts = main_constituents.get(end_pos);
-           if (nts == null) main_constituents.put(end_pos, new HashSet<Integer>());
++          if (nts == null) main_constituents.put(end_pos, new HashSet<>());
 +          main_constituents.get(end_pos).add(forwardLattice.get(i));
 +        }
 +      }
 +      for (int i = forwardIndex.get(to); i < forwardIndex.get(to + 1); i += 2) {
 +        Set<Integer> main_set = main_constituents.get(to + forwardLattice.get(i + 1));
 +        if (main_set != null) {
 +          for (int main : main_set)
 +            labels.add(Vocabulary.id(adjustMarkup(Vocabulary.word(main) + "/"
 +                + Vocabulary.word(forwardLattice.get(i)))));
 +        }
 +      }
 +    }
 +
 +    if (!is_prefix) {
 +      if (useBackwardLattice) {
 +        // check if there is any possible higher-level constituent overlapping
 +        int to_end =
 +            (to == backwardIndex.size() - 1) ? backwardLattice.size() : backwardIndex.get(to + 1);
 +        // check longest span ending in to..
 +        if (backwardLattice.get(to_end - 1) <= span_length) return labels;
 +
-         Map<Integer, Set<Integer>> main_constituents = new HashMap<Integer, Set<Integer>>();
++        Map<Integer, Set<Integer>> main_constituents = new HashMap<>();
 +        // find missing to the left
 +        for (int i = to_end - 2; i >= backwardIndex.get(to); i -= 2) {
 +          int current_span = backwardLattice.get(i + 1);
 +          if (current_span <= span_length)
 +            break;
 +          else {
 +            int start_pos = to - backwardLattice.get(i + 1);
 +            Set<Integer> nts = main_constituents.get(start_pos);
-             if (nts == null) main_constituents.put(start_pos, new HashSet<Integer>());
++            if (nts == null) main_constituents.put(start_pos, new HashSet<>());
 +            main_constituents.get(start_pos).add(backwardLattice.get(i));
 +          }
 +        }
 +        for (int i = backwardIndex.get(from); i < backwardIndex.get(from + 1); i += 2) {
 +          Set<Integer> main_set = main_constituents.get(from - backwardLattice.get(i + 1));
 +          if (main_set != null) {
 +            for (int main : main_set)
 +              labels.add(Vocabulary.id(adjustMarkup(Vocabulary.word(main) + "\\"
 +                  + Vocabulary.word(backwardLattice.get(i)))));
 +          }
 +        }
 +      } else {
 +        // TODO: bothersome no-backwards-arrays method.
 +      }
 +    }
 +    return labels;
 +  }
 +
 +  @Override
 +  public int[] getTerminals() {
 +    return getTerminals(0, terminals.size());
 +  }
 +
 +  @Override
 +  public int[] getTerminals(int from, int to) {
 +    int[] span = new int[to - from];
 +    for (int i = from; i < to; i++)
 +      span[i - from] = terminals.get(i);
 +    return span;
 +  }
 +
 +  public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
 +    // TODO Auto-generated method stub
 +  }
 +
 +  public void writeExternal(ObjectOutput out) throws IOException {
 +    // TODO Auto-generated method stub
 +  }
 +
 +  /**
 +   * Reads Penn Treebank format file
 +   * @param file_name the string path of the Penn Treebank file
 +   * @throws IOException if the file does not exist
 +   */
 +  public void readExternalText(String file_name) throws IOException {
 +    LineReader reader = new LineReader(file_name);
 +    initialize();
 +    for (String line : reader) {
 +      if (line.trim().equals("")) continue;
 +      appendFromPennFormat(line);
 +    }
 +  }
 +
 +  public void writeExternalText(String file_name) throws IOException {
 +    // TODO Auto-generated method stub
 +  }
 +
 +  @Override
 +  public String toString() {
 +    StringBuilder sb = new StringBuilder();
 +    for (int i = 0; i < forwardIndex.size(); i++)
-       sb.append("FI[" + i + "] =\t" + forwardIndex.get(i) + "\n");
++      sb.append("FI[").append(i).append("] =\t").append(forwardIndex.get(i)).append("\n");
 +    sb.append("\n");
 +    for (int i = 0; i < forwardLattice.size(); i += 2)
-       sb.append("F[" + i + "] =\t" + Vocabulary.word(forwardLattice.get(i)) + " , "
-           + forwardLattice.get(i + 1) + "\n");
++      sb.append("F[").append(i).append("] =\t").append(Vocabulary.word(forwardLattice.get(i)))
++          .append(" , ").append(forwardLattice.get(i + 1)).append("\n");
 +
 +    sb.append("\n");
 +    for (int i = 0; i < terminals.size(); i += 1)
-       sb.append("T[" + i + "] =\t" + Vocabulary.word(terminals.get(i)) + " , 1 \n");
++      sb.append("T[").append(i).append("] =\t").append(Vocabulary.word(terminals.get(i)))
++          .append(" , 1 \n");
 +
 +    if (this.useBackwardLattice) {
 +      sb.append("\n");
 +      for (int i = 0; i < backwardIndex.size(); i++)
-         sb.append("BI[" + i + "] =\t" + backwardIndex.get(i) + "\n");
++        sb.append("BI[").append(i).append("] =\t").append(backwardIndex.get(i)).append("\n");
 +      sb.append("\n");
 +      for (int i = 0; i < backwardLattice.size(); i += 2)
-         sb.append("B[" + i + "] =\t" + Vocabulary.word(backwardLattice.get(i)) + " , "
-             + backwardLattice.get(i + 1) + "\n");
++        sb.append("B[").append(i).append("] =\t").append(Vocabulary.word(backwardLattice.get(i)))
++            .append(" , ").append(backwardLattice.get(i + 1)).append("\n");
 +    }
 +    return sb.toString();
 +  }
 +
 +
 +  private void initialize() {
-     forwardIndex = new ArrayList<Integer>();
++    forwardIndex = new ArrayList<>();
 +    forwardIndex.add(0);
-     forwardLattice = new ArrayList<Integer>();
++    forwardLattice = new ArrayList<>();
 +    if (this.useBackwardLattice) {
-       backwardIndex = new ArrayList<Integer>();
++      backwardIndex = new ArrayList<>();
 +      backwardIndex.add(0);
-       backwardLattice = new ArrayList<Integer>();
++      backwardLattice = new ArrayList<>();
 +    }
 +
-     terminals = new ArrayList<Integer>();
++    terminals = new ArrayList<>();
 +  }
 +
 +
 +  // TODO: could make this way more efficient
 +  private void appendFromPennFormat(String line) {
 +    String[] tokens = line.replaceAll("\\(", " ( ").replaceAll("\\)", " ) ").trim().split("\\s+");
 +
 +    boolean next_nt = false;
 +    int current_id = 0;
-     Stack<Integer> stack = new Stack<Integer>();
++    Stack<Integer> stack = new Stack<>();
 +
 +    for (String token : tokens) {
 +      if ("(".equals(token)) {
 +        next_nt = true;
 +        continue;
 +      }
 +      if (")".equals(token)) {
 +        int closing_pos = stack.pop();
 +        forwardLattice.set(closing_pos, forwardIndex.size() - forwardLattice.get(closing_pos));
 +        if (this.useBackwardLattice) {
 +          backwardLattice.add(forwardLattice.get(closing_pos - 1));
 +          backwardLattice.add(forwardLattice.get(closing_pos));
 +        }
 +        continue;
 +      }
 +      if (next_nt) {
 +        // get NT id
 +        current_id = Vocabulary.id(adjustMarkup(token));
 +        // add into lattice
 +        forwardLattice.add(current_id);
 +        // push NT span field onto stack (added hereafter, we're just saving the "- 1")
 +        stack.push(forwardLattice.size());
 +        // add NT span field
 +        forwardLattice.add(forwardIndex.size());
 +      } else {
 +        current_id = Vocabulary.id(token);
 +        terminals.add(current_id);
 +
 +        forwardIndex.add(forwardLattice.size());
 +        if (this.useBackwardLattice) backwardIndex.add(backwardLattice.size());
 +      }
 +      next_nt = false;
 +    }
 +  }
 +
 +  private String adjustMarkup(String nt) {
 +    return "[" + nt.replaceAll("[\\[\\]]", "") + "]";
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/SyntaxTree.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/corpus/syntax/SyntaxTree.java
index 6bb4c0b,0000000..f96cd2c
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/SyntaxTree.java
+++ b/joshua-core/src/main/java/org/apache/joshua/corpus/syntax/SyntaxTree.java
@@@ -1,34 -1,0 +1,34 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.corpus.syntax;
 +
 +import java.util.Collection;
 +
 +public interface SyntaxTree {
 +
-   public Collection<Integer> getConstituentLabels(int from, int to);
++  Collection<Integer> getConstituentLabels(int from, int to);
 +
-   public Collection<Integer> getConcatenatedLabels(int from, int to);
++  Collection<Integer> getConcatenatedLabels(int from, int to);
 +
-   public Collection<Integer> getCcgLabels(int from, int to);
++  Collection<Integer> getCcgLabels(int from, int to);
 +
-   public int[] getTerminals();
++  int[] getTerminals();
 +
-   public int[] getTerminals(int from, int to);
++  int[] getTerminals(int from, int to);
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/ArgsParser.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/ArgsParser.java
index 5af6d11,0000000..26ed674
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ArgsParser.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ArgsParser.java
@@@ -1,118 -1,0 +1,116 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder;
 +
 +import java.io.IOException;
 +import java.nio.charset.Charset;
 +import java.nio.file.Files;
 +import java.nio.file.Paths;
 +
 +import org.apache.joshua.util.io.LineReader;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +/**
 + * @author orluke
 + * 
 + */
 +public class ArgsParser {
 +
 +  private static final Logger LOG = LoggerFactory.getLogger(ArgsParser.class);
 +
 +  private String configFile = null;
 +
 +  /**
 +   * Parse the arguments passed from the command line when the JoshuaDecoder application was
 +   * executed from the command line.
 +   * 
 +   * @param args string array of input arguments
 +   * @param config the {@link org.apache.joshua.decoder.JoshuaConfiguration}
 +   * @throws IOException if there is an error wit the input arguments
 +   */
 +  public ArgsParser(String[] args, JoshuaConfiguration config) throws IOException {
 +
 +    /*
 +     * Look for a verbose flag, -v.
 +     * 
 +     * Look for an argument to the "-config" flag to find the config file, if any. 
 +     */
 +    if (args.length >= 1) {
 +      // Search for a verbose flag
 +      for (int i = 0; i < args.length; i++) {
 +        if (args[i].equals("-v")) {
 +          Decoder.VERBOSE = Integer.parseInt(args[i + 1].trim());
 +          config.setVerbosity(Decoder.VERBOSE);
 +        }
 +      
 +        if (args[i].equals("-version")) {
 +          LineReader reader = new LineReader(String.format("%s/VERSION", System.getenv("JOSHUA")));
 +          reader.readLine();
 +          String version = reader.readLine().split("\\s+")[2];
 +          System.out.println(String.format("The Apache Joshua machine translator, version %s", version));
 +          System.out.println("joshua.incubator.apache.org");
 +          System.exit(0);
 +
 +        } else if (args[i].equals("-license")) {
 +          try {
-             for (String line: Files.readAllLines(Paths.get(String.format("%s/../LICENSE", 
-                 JoshuaConfiguration.class.getProtectionDomain().getCodeSource().getLocation().getPath())), 
-                 Charset.defaultCharset())) {
-               System.out.println(line);
-             }
++            Files.readAllLines(Paths.get(String.format("%s/../LICENSE",
++                JoshuaConfiguration.class.getProtectionDomain().getCodeSource().getLocation()
++                    .getPath())), Charset.defaultCharset()).forEach(System.out::println);
 +          } catch (IOException e) {
 +            throw new RuntimeException("FATAL: missing license file!", e);
 +          }
 +          System.exit(0);
 +        }
 +      }
 +
 +      // Search for the configuration file from the end (so as to take the last one)
 +      for (int i = args.length-1; i >= 0; i--) {
 +        if (args[i].equals("-c") || args[i].equals("-config")) {
 +
 +          setConfigFile(args[i + 1].trim());
 +          try {
 +            LOG.info("Parameters read from configuration file: {}", getConfigFile());
 +            config.readConfigFile(getConfigFile());
 +          } catch (IOException e) {
 +            throw new RuntimeException(e);
 +          }
 +          break;
 +        }
 +      }
 +
 +      // Now process all the command-line args
 +      config.processCommandLineOptions(args);
 +    }
 +  }
 +
 +  /**
 +   * @return the configFile
 +   */
 +  public String getConfigFile() {
 +    return configFile;
 +  }
 +
 +  /**
 +   * @param configFile the configFile to set
 +   */
 +  public void setConfigFile(String configFile) {
 +    this.configFile = configFile;
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/BLEU.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/BLEU.java
index a6e02b2,0000000..6eb45ae
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/BLEU.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/BLEU.java
@@@ -1,562 -1,0 +1,562 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder;
 +
 +import java.util.ArrayList;
 +import java.util.HashMap;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.Map.Entry;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.ff.state_maintenance.NgramDPState;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HyperEdge;
 +import org.apache.joshua.util.Ngram;
 +import org.apache.joshua.util.Regex;
 +
 +/**
 + * this class implements: (1) sentence-level bleu, with smoothing
 + * 
 + * @author Zhifei Li, zhifei.work@gmail.com
 + */
 +public class BLEU {
 +  // do_ngram_clip: consider global n-gram clip
 +
 +  public static float computeSentenceBleu(String[] refSents, String hypSent) {
 +    return computeSentenceBleu(refSents, hypSent, true, 4, false);
 +  }
 +
 +  // ====================multiple references
 +  /**
 +   * 
 +   * @param refSents todo
 +   * @param hypSent todo
 +   * @param doNgramClip Should usually be true
 +   * @param bleuOrder Should usually be 4
 +   * @param useShortestRef Probably use false
 +   * @return todo
 +   */
 +  public static float computeSentenceBleu(String[] refSents, String hypSent, boolean doNgramClip,
 +      int bleuOrder, boolean useShortestRef) {
 +    // === ref tbl
 +    HashMap<String, Integer> maxRefCountTbl = constructMaxRefCountTable(refSents, bleuOrder);
 +
 +    // == ref len
 +    int[] refLens = new int[refSents.length];
 +    for (int i = 0; i < refSents.length; i++) {
 +      String[] refWords = Regex.spaces.split(refSents[i]);
 +      refLens[i] = refWords.length;
 +    }
 +
 +    float effectiveRefLen = computeEffectiveLen(refLens, useShortestRef);
 +
 +    // === hyp tbl
 +    String[] hypWrds = Regex.spaces.split(hypSent);
-     HashMap<String, Integer> hypNgramTbl = new HashMap<String, Integer>();
++    HashMap<String, Integer> hypNgramTbl = new HashMap<>();
 +    Ngram.getNgrams(hypNgramTbl, 1, bleuOrder, hypWrds);
 +    return computeSentenceBleu(effectiveRefLen, maxRefCountTbl, hypWrds.length, hypNgramTbl,
 +        doNgramClip, bleuOrder);
 +  }
 +
 +  public static float computeEffectiveLen(int[] refLens, boolean useShortestRef) {
 +    if (useShortestRef) {
 +      int res = Integer.MAX_VALUE;
-       for (int i = 0; i < refLens.length; i++)
-         if (refLens[i] < res)
-           res = refLens[i];
++      for (int refLen : refLens)
++        if (refLen < res)
++          res = refLen;
 +      return res;
 +    } else {// default is average length
 +      float res = 0;
-       for (int i = 0; i < refLens.length; i++)
-         res += refLens[i];
++      for (int refLen : refLens)
++        res += refLen;
 +      return res * 1.0f / refLens.length;
 +    }
 +  }
 +
 +  /**
 +   * words in the ngrams are using integer symbol ID
 +   * @param refSents todo
 +   * @param bleuOrder todo
 +   * @return todo
 +   * */
 +  public static HashMap<String, Integer> constructMaxRefCountTable(String[] refSents, int bleuOrder) {
 +
-     List<HashMap<String, Integer>> listRefNgramTbl = new ArrayList<HashMap<String, Integer>>();
-     for (int i = 0; i < refSents.length; i++) {
++    List<HashMap<String, Integer>> listRefNgramTbl = new ArrayList<>();
++    for (String refSent : refSents) {
 +      // if(refSents[i]==null){System.out.println("null ref sent"); System.exit(1);}
 +      // String[] refWords = refSents[i].split("\\s+");
-       String[] refWords = Regex.spaces.split(refSents[i]);
++      String[] refWords = Regex.spaces.split(refSent);
 +
-       HashMap<String, Integer> refNgramTbl = new HashMap<String, Integer>();
++      HashMap<String, Integer> refNgramTbl = new HashMap<>();
 +      Ngram.getNgrams(refNgramTbl, 1, bleuOrder, refWords);
 +      listRefNgramTbl.add(refNgramTbl);
 +    }
 +
 +    return computeMaxRefCountTbl(listRefNgramTbl);
 +  }
 +
 +  /**
 +   * compute max_ref_count for each ngram in the reference sentences
 +   * @param listRefNgramTbl todo
 +   * @return todo
 +   * */
 +  public static HashMap<String, Integer> computeMaxRefCountTbl(
 +      List<HashMap<String, Integer>> listRefNgramTbl) {
 +
-     HashMap<String, Integer> merged = new HashMap<String, Integer>();
++    HashMap<String, Integer> merged = new HashMap<>();
 +
 +    // == get merged key set
 +    for (HashMap<String, Integer> tbl : listRefNgramTbl) {
 +      for (String ngram : tbl.keySet()) {
 +        merged.put(ngram, 0);
 +      }
 +    }
 +
 +    // == get max ref count
 +    for (String ngram : merged.keySet()) {
 +      int max = 0;
 +      for (HashMap<String, Integer> tbl : listRefNgramTbl) {
 +        Integer val = tbl.get(ngram);
 +        if (val != null && val > max)
 +          max = val;
 +      }
 +
 +      merged.put(ngram, max);
 +    }
 +    return merged;
 +  }
 +
 +  public static float computeSentenceBleu(float effectiveRefLen,
 +      HashMap<String, Integer> maxRefCountTbl, int hypLen, HashMap<String, Integer> hypNgramTbl,
 +      boolean doNgramClip, int bleuOrder) {
 +
 +    float resBleu = 0.0f;
 +
 +    int[] numNgramMatch = new int[bleuOrder];
 +    for (Map.Entry<String, Integer> entry : hypNgramTbl.entrySet()) {// each ngram in hyp
 +      String ngram = entry.getKey();
 +      if (maxRefCountTbl.containsKey(ngram)) {
 +        int hypNgramCount = entry.getValue();
 +
 +        int effectiveNumMatch = hypNgramCount;
 +
 +        if (doNgramClip) {// min{hypNgramCount, maxRefCount}
 +          int maxRefCount = maxRefCountTbl.get(ngram);
 +          effectiveNumMatch = (int) Support.findMin(hypNgramCount, maxRefCount); // ngram clip;
 +        }
 +
 +        numNgramMatch[Regex.spaces.split(ngram).length - 1] += effectiveNumMatch;
 +      }
 +    }
 +
 +    resBleu = computeBleu(hypLen, effectiveRefLen, numNgramMatch, bleuOrder);
 +    // System.out.println("hyp_len: " + hyp_sent.length + "; ref_len:" + ref_sent.length +
 +    // "; bleu: " + res_bleu +" num_ngram_matches: " + num_ngram_match[0] + " " +num_ngram_match[1]+
 +    // " " + num_ngram_match[2] + " " +num_ngram_match[3]);
 +    // System.out.println("Blue is " + res_bleu);
 +    return resBleu;
 +  }
 +
 +  // ==============================multiple references end
 +
 +  public static float computeSentenceBleu(String refSent, String hypSent, boolean doNgramClip,
 +      int bleuOrder) {
 +    String[] refWrds = Regex.spaces.split(refSent);
 +    String[] hypWrds = Regex.spaces.split(hypSent);
-     HashMap<String, Integer> refNgramTbl = new HashMap<String, Integer>();
++    HashMap<String, Integer> refNgramTbl = new HashMap<>();
 +    Ngram.getNgrams(refNgramTbl, 1, bleuOrder, refWrds);
-     HashMap<String, Integer> hypNgramTbl = new HashMap<String, Integer>();
++    HashMap<String, Integer> hypNgramTbl = new HashMap<>();
 +    Ngram.getNgrams(hypNgramTbl, 1, bleuOrder, hypWrds);
 +    return computeSentenceBleu(refWrds.length, refNgramTbl, hypWrds.length, hypNgramTbl,
 +        doNgramClip, bleuOrder);
 +  }
 +
 +  public static float computeSentenceBleu(int refLen, HashMap<String, Integer> refNgramTbl,
 +      int hypLen, HashMap<String, Integer> hypNgramTbl, boolean doNgramClip, int bleuOrder) {
 +    float resBleu = 0;
 +
 +    int[] numNgramMatch = new int[bleuOrder];
 +    for (Map.Entry<String, Integer> entry : hypNgramTbl.entrySet()) {
 +      String ngram = entry.getKey();
 +      if (refNgramTbl.containsKey(ngram)) {
 +        if (doNgramClip) {
 +          numNgramMatch[Regex.spaces.split(ngram).length - 1] += Support.findMin(
 +              refNgramTbl.get(ngram), entry.getValue()); // ngram clip
 +        } else {
 +          numNgramMatch[Regex.spaces.split(ngram).length - 1] += entry.getValue();// without ngram count clipping
 +        }
 +      }
 +    }
 +    resBleu = computeBleu(hypLen, refLen, numNgramMatch, bleuOrder);
 +    // System.out.println("hyp_len: " + hyp_sent.length + "; ref_len:" + ref_sent.length +
 +    // "; bleu: " + res_bleu +" num_ngram_matches: " + num_ngram_match[0] + " " +num_ngram_match[1]+
 +    // " " + num_ngram_match[2] + " " +num_ngram_match[3]);
 +    // System.out.println("Blue is " + res_bleu);
 +    return resBleu;
 +  }
 +
 +  // sentence-bleu: BLEU= bp * prec; where prec = exp (sum 1/4 * log(prec[order]))
 +  public static float computeBleu(int hypLen, float refLen, int[] numNgramMatch, int bleuOrder) {
 +    if (hypLen <= 0 || refLen <= 0) {
 +      throw new RuntimeException("error: ref or hyp is zero len");
 +    }
 +    float res = 0;
 +    float wt = 1.0f / bleuOrder;
 +    float prec = 0;
 +    float smooth_factor = 1.0f;
 +    for (int t = 0; t < bleuOrder && t < hypLen; t++) {
 +      if (numNgramMatch[t] > 0) {
 +        prec += wt * Math.log(numNgramMatch[t] * 1.0 / (hypLen - t));
 +      } else {
 +        smooth_factor *= 0.5;// TODO
 +        prec += wt * Math.log(smooth_factor / (hypLen - t));
 +      }
 +    }
 +    float bp = (hypLen >= refLen) ? 1.0f : (float) Math.exp(1 - refLen / hypLen);
 +    res = bp * (float) Math.exp(prec);
 +    // System.out.println("hyp_len: " + hyp_len + "; ref_len:" + ref_len + "prec: " + Math.exp(prec)
 +    // + "; bp: " + bp + "; bleu: " + res);
 +    return res;
 +  }
 +
 +  public static HashMap<String, Integer> constructNgramTable(String sentence, int bleuOrder) {
-     HashMap<String, Integer> ngramTable = new HashMap<String, Integer>();
++    HashMap<String, Integer> ngramTable = new HashMap<>();
 +    String[] refWrds = Regex.spaces.split(sentence);
 +    Ngram.getNgrams(ngramTable, 1, bleuOrder, refWrds);
 +    return ngramTable;
 +  }
 +
 +  // ================================ Google linear corpus gain
 +  // ============================================
 +  public static float computeLinearCorpusGain(float[] linearCorpusGainThetas, String[] refSents,
 +      String hypSent) {
 +    int bleuOrder = 4;
 +    int hypLength = Regex.spaces.split(hypSent).length;
 +    HashMap<String, Integer> refereceNgramTable = BLEU.constructMaxRefCountTable(refSents,
 +        bleuOrder);
 +    HashMap<String, Integer> hypNgramTable = BLEU.constructNgramTable(hypSent, bleuOrder);
 +    return computeLinearCorpusGain(linearCorpusGainThetas, hypLength, hypNgramTable,
 +        refereceNgramTable);
 +  }
 +
 +  /**
 +   * speed consideration: assume hypNgramTable has a smaller size than referenceNgramTable does
 +   * @param linearCorpusGainThetas todo
 +   * @param hypLength todo
 +   * @param hypNgramTable todo
 +   * @param referenceNgramTable todo
 +   * @return todo
 +   */
 +  public static float computeLinearCorpusGain(float[] linearCorpusGainThetas, int hypLength,
 +      Map<String, Integer> hypNgramTable, Map<String, Integer> referenceNgramTable) {
 +    float res = 0;
 +    res += linearCorpusGainThetas[0] * hypLength;
 +    for (Entry<String, Integer> entry : hypNgramTable.entrySet()) {
 +      String ngram = entry.getKey();
 +      if (referenceNgramTable.containsKey(ngram)) {// delta function
 +        int ngramOrder = Regex.spaces.split(ngram).length;
 +        res += entry.getValue() * linearCorpusGainThetas[ngramOrder];
 +      }
 +    }
 +    return res;
 +  }
 +
 +  /* Convenience function */
 +  public static int[] computeNgramMatches(String[] refSents, String hypSent) {
 +    int bleuOrder = 4;
 +    int hypLength = Regex.spaces.split(hypSent).length;
 +    HashMap<String, Integer> refereceNgramTable = BLEU.constructMaxRefCountTable(refSents,
 +        bleuOrder);
 +    HashMap<String, Integer> hypNgramTable = BLEU.constructNgramTable(hypSent, bleuOrder);
 +    return computeNgramMatches(hypLength, hypNgramTable, refereceNgramTable, bleuOrder);
 +  }
 +
 +  public static int[] computeNgramMatches(int hypLength, Map<String, Integer> hypNgramTable,
 +      Map<String, Integer> referenceNgramTable, int highestOrder) {
 +    int[] res = new int[highestOrder + 1];
 +    res[0] = hypLength;
 +    for (Entry<String, Integer> entry : hypNgramTable.entrySet()) {
 +      String ngram = entry.getKey();
 +      if (referenceNgramTable.containsKey(ngram)) {// delta function
 +        int ngramOrder = Regex.spaces.split(ngram).length;
 +        res[ngramOrder] += entry.getValue();
 +      }
 +    }
 +
 +    /*
 +    System.err.print("NGRAMS:");
 +    for (String ngram: hypNgramTable.keySet())
 +      System.err.print(" | " + ngram);
 +    System.err.println();
 +    System.err.print("REF:");
 +    for (String ngram: referenceNgramTable.keySet())
 +      System.err.print(" | " + ngram);
 +    System.err.println();
 +    System.err.print("COUNTS:");
 +    for (int i = 1; i <= 4; i++)
 +      System.err.print(" " + res[i]);
 +    System.err.println();
 +    */
 +
 +    return res;
 +  }
 +
 +  static public float[] computeLinearCorpusThetas(int numUnigramTokens, float unigramPrecision,
 +      float decayRatio) {
 +    float[] res = new float[5];
 +    res[0] = -1.0f / numUnigramTokens;
 +    for (int i = 1; i < 5; i++)
 +      res[i] = (1.0f / (4.0f * numUnigramTokens * unigramPrecision * (float) Math.pow(decayRatio,
 +          i - 1)));
 +
 +    float firstWeight = res[0];
 +    for (int i = 0; i < 5; i++)
 +      res[i] /= Math.abs(firstWeight);// normalize by first one
 +
 +    System.out.print("Normalized Thetas are: ");
 +    for (int i = 0; i < 5; i++)
 +      System.out.print(res[i] + " ");
 +    System.out.print("\n");
 +
 +    return res;
 +  }
 +
 +  public static final int maxOrder = 4;
 +
 +  /**
 +   * Computes BLEU statistics incurred by a rule. This is (a) all ngram (n &lt;= 4) for terminal rules
 +   * and (b) all ngrams overlying boundary points between terminals in the rule and ngram state from
 +   * tail nodes.
 +   * 
 +   * There are four cases to handle:
 +   * <ul>
 +   * <li>only words
 +   * <li>a number of words followed by a nonterminal (left context of tail tail node)
 +   * <li>a nonterminal (right context of tail node) followed by one or more words
 +   * <li>two nonterminals (right context of tail node 1, left context of tail node 2)
 +   * </ul>
 +   * 
 +   * Of these, all but the first have a boundary point to consider.
 +   * 
 +   * @param edge todo
 +   * @param spanPct todo
 +   * @param references the reference to compute statistics against
 +   * @return todo
 +   */
 +  public static Stats compute(HyperEdge edge, float spanPct, References references) {
 +    Stats stats = new Stats();
 +    // TODO: this should not be the span width, but the real ref scaled to the span percentage
 +    stats.reflen = (int) (spanPct * references.reflen);
 +
 +    Rule rule = edge.getRule();
 +    if (rule != null) {
 +      int[] symbols = rule.getTarget();
 +
 +//      System.err.println(String.format("compute(%s)", rule));
 +      
-       ArrayList<Integer> currentNgram = new ArrayList<Integer>();
++      ArrayList<Integer> currentNgram = new ArrayList<>();
 +      int boundary = -1;
 +      int tailIndex = -1;
-       for (int i = 0; i < symbols.length; i++) {
-         if (symbols[i] < 0) {
++      for (int symbol : symbols) {
++        if (symbol < 0) {
 +          tailIndex++;
 +
 +          NgramDPState ngramState = null;
 +          try {
 +            ngramState = (NgramDPState) edge.getTailNodes().get(tailIndex).getDPState(0);
 +          } catch (ClassCastException e) {
-             throw new RuntimeException(String.format(
-                 "* FATAL: first state needs to be NgramDPState (found %s)", edge.getTailNodes()
-                     .get(tailIndex).getDPState(0).getClass()));
++            throw new RuntimeException(String
++                .format("* FATAL: first state needs to be NgramDPState (found %s)",
++                    edge.getTailNodes().get(tailIndex).getDPState(0).getClass()));
 +          }
-           
++
 +          // Compute ngrams overlapping with left context of tail node
 +          if (currentNgram.size() > 0) {
 +            boundary = currentNgram.size();
 +            for (int id : ngramState.getLeftLMStateWords())
 +              currentNgram.add(id);
 +
 +            // Compute the BLEU statistics
-             BLEU.Stats partStats = computeOverDivide(currentNgram, references, boundary);
++            Stats partStats = computeOverDivide(currentNgram, references, boundary);
 +            stats.add(partStats);
-             
- //            System.err.println("    " + Vocabulary.getWords(ngramState.getLeftLMStateWords()));
++
++            //            System.err.println("    " + Vocabulary.getWords(ngramState.getLeftLMStateWords()));
 +
 +            currentNgram.clear();
 +          }
-           
- //          System.err.println("    " + Vocabulary.getWords(ngramState.getRightLMStateWords()));
++
++          //          System.err.println("    " + Vocabulary.getWords(ngramState.getRightLMStateWords()));
 +
 +          // Accumulate ngrams from right context of tail node
 +          for (int id : ngramState.getRightLMStateWords())
 +            currentNgram.add(id);
 +
 +          boundary = currentNgram.size();
 +
 +        } else { // terminal symbol
-           currentNgram.add(symbols[i]);
++          currentNgram.add(symbol);
 +          stats.len++;
 +
- //          System.err.println("    " + Vocabulary.word(symbols[i]));
-           
++          //          System.err.println("    " + Vocabulary.word(symbols[i]));
++
 +          if (boundary != -1) {
-             BLEU.Stats partStats = computeOverDivide(currentNgram, references, boundary);
++            Stats partStats = computeOverDivide(currentNgram, references, boundary);
 +            stats.add(partStats);
 +
 +            // Shift off the context from the nonterminal's righthand side
 +            for (int j = 0; j < boundary; j++)
 +              currentNgram.remove(0);
 +            boundary = -1;
 +          }
 +        }
 +
 +        /*
 +         * At the end, we might have (a) nothing, (b) a sequence of words from a nonterminal's
 +         * righthand side, (c) a sequence of words from the rule, or (d) a sequence of words from a
 +         * nonterminal's righthand context and from the rule
 +         */
 +        if (currentNgram.size() > 0 && currentNgram.size() != boundary) { // skip cases (a) and (b)
-           BLEU.Stats partStats = computeOverDivide(currentNgram, references, boundary);
++          Stats partStats = computeOverDivide(currentNgram, references, boundary);
 +          stats.add(partStats);
 +        }
 +      }
 +    }
 +    return stats;
 +  }
 +
 +  /**
 +   * When computing BLEU statistics over a rule, we need to avoid adding in ngrams that are
 +   * exclusively contained inside tail nodes. This function accumulates all the eligible ngrams from
 +   * a string respective of an optional boundary point, and then calls computeNgramMatches().
 +   * 
 +   * @param ngram the current set of ngrams
 +   * @param references contains the set of ngrams to compare against
 +   * @param boundary the boundary over which all ngrams must fall (-1 means ignore boundary)
 +   * @return
 +   */
 +  private static Stats computeOverDivide(ArrayList<Integer> ngram, References references,
 +      int boundary) {
 +    
 +//    System.err.print(String.format("      BOUNDARY(%s, %d)", Vocabulary.getWords(ngram), boundary));
 +
-     HashMap<String, Integer> boundaryNgrams = new HashMap<String, Integer>();
++    HashMap<String, Integer> boundaryNgrams = new HashMap<>();
 +    for (int width = 1; width <= Math.min(maxOrder, ngram.size()); width++) {
 +      for (int i = 0; i < ngram.size() - width + 1; i++) {
 +        int j = i + width;
 +
 +        final List<Integer> piece = ngram.subList(i, j);
 +        if (boundary == -1 || (boundary > i && boundary < j)) {
 +          String ngramStr = Vocabulary.getWords(piece);
 +          if (!boundaryNgrams.containsKey(ngramStr))
 +            boundaryNgrams.put(ngramStr, 1);
 +          else
 +            boundaryNgrams.put(ngramStr, boundaryNgrams.get(ngramStr));
 +        }
 +      }
 +    }
 +    
 +    /*
 +    System.err.print(" FOUND");
 +    for (String phr: boundaryNgrams.keySet())
 +      System.err.print(" | " + phr);
 +    System.err.println();
 +    */
 +
 +    BLEU.Stats result = new BLEU.Stats();
 +    int[] stats = BLEU.computeNgramMatches(0, boundaryNgrams, references.ngramCounts, maxOrder);
 +    System.arraycopy(stats, 1, result.counts, 0, maxOrder);
 +
 +    return result;
 +  }
 +
 +  public static class References {
 +    HashMap<String, Integer> ngramCounts;
 +    float reflen;
 +
 +    public References(String reference) {
 +      String[] refs = new String[1];
 +      refs[0] = reference;
 +      fill(refs);
 +    }
 +
 +    public References(String[] references) {
 +      fill(references);
 +    }
 +
 +    private void fill(String[] references) {
-       ngramCounts = new HashMap<String, Integer>();
++      ngramCounts = new HashMap<>();
 +      reflen = 0.0f;
-       for (int i = 0; i < references.length; i++) {
-         String[] ref = references[i].split(" ");
++      for (String reference : references) {
++        String[] ref = reference.split(" ");
 +        Ngram.getNgrams(ngramCounts, 1, maxOrder, ref);
 +        reflen += ref.length;
 +      }
 +      reflen /= references.length;
 +    }
 +  }
 +
 +  public static float score(Stats stats) {
 +    float score = 0f;
 +    float wt = 1.0f / maxOrder;
 +    float prec = 0;
 +    float smooth_factor = 1.0f;
 +    for (int t = 0; t < maxOrder && t < stats.len; t++) {
 +      if (stats.counts[t] > 0) {
 +        prec += wt * Math.log(stats.counts[t] * 1.0 / (stats.len - t));
 +      } else {
 +        smooth_factor *= 0.5;// TODO
 +        prec += wt * Math.log(smooth_factor / (stats.len - t));
 +      }
 +    }
 +    float bp = (stats.len >= stats.reflen) ? 1.0f : (float) Math.exp(1 - stats.reflen / stats.len);
 +    score = bp * (float) Math.exp(prec);
 +    
 +//    System.err.println(String.format("BLEU(%d %d %d %d / BP=%f) = %f", stats.counts[0], stats.counts[1], stats.counts[2], stats.counts[3], bp, score));
 +    return score;
 +  }
 +
 +  /**
 +   * Accumulated sufficient statistics for computing BLEU.
 +   */
 +  public static class Stats {
-     public int[] counts;
++    public final int[] counts;
 +    public float len;
 +    public float reflen;
 +
 +    public Stats() {
 +      counts = new int[4];
 +      len = 0.0f;
 +      reflen = 0.0f;
 +    }
 +
 +    public Stats(int[] counts, float len, float reflen) {
 +      this.counts = counts;
 +      this.len = len;
 +      this.reflen = reflen;
 +    }
 +
 +    public void add(Stats otherStats) {
 +      for (int i = 0; i < counts.length; i++)
 +        counts[i] += otherStats.counts[i];
 +      
 +      len += otherStats.len;
 +    }
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
index 5b0ae0f,0000000..76ba021
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
@@@ -1,766 -1,0 +1,768 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder;
 +
 +import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
 +import static org.apache.joshua.decoder.ff.tm.OwnerMap.getOwner;
 +import static org.apache.joshua.util.Constants.spaceSeparator;
 +
 +import java.io.BufferedWriter;
 +import java.io.File;
 +import java.io.FileNotFoundException;
 +import java.io.IOException;
 +import java.lang.reflect.Constructor;
 +import java.util.ArrayList;
 +import java.util.HashMap;
 +import java.util.HashSet;
 +import java.util.List;
 +import java.util.Map.Entry;
 +import java.util.Set;
 +import java.util.concurrent.ArrayBlockingQueue;
 +import java.util.concurrent.BlockingQueue;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.ff.FeatureFunction;
 +import org.apache.joshua.decoder.ff.FeatureMap;
 +import org.apache.joshua.decoder.ff.FeatureVector;
 +import org.apache.joshua.decoder.ff.PhraseModel;
 +import org.apache.joshua.decoder.ff.StatefulFF;
 +import org.apache.joshua.decoder.ff.lm.LanguageModelFF;
 +import org.apache.joshua.decoder.ff.tm.Grammar;
 +import org.apache.joshua.decoder.ff.tm.OwnerId;
 +import org.apache.joshua.decoder.ff.tm.OwnerMap;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
 +import org.apache.joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
 +import org.apache.joshua.decoder.ff.tm.packed.PackedGrammar;
 +import org.apache.joshua.decoder.io.TranslationRequestStream;
 +import org.apache.joshua.decoder.phrase.PhraseTable;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.apache.joshua.util.FileUtility;
 +import org.apache.joshua.util.FormatUtils;
 +import org.apache.joshua.util.Regex;
 +import org.apache.joshua.util.io.LineReader;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +import com.google.common.base.Strings;
 +
 +/**
 + * This class handles decoder initialization and the complication introduced by multithreading.
 + *
 + * After initialization, the main entry point to the Decoder object is
 + * decodeAll(TranslationRequest), which returns a set of Translation objects wrapped in an iterable
 + * Translations object. It is important that we support multithreading both (a) across the sentences
 + * within a request and (b) across requests, in a round-robin fashion. This is done by maintaining a
 + * fixed sized concurrent thread pool. When a new request comes in, a RequestParallelizer thread is
 + * launched. This object iterates over the request's sentences, obtaining a thread from the
 + * thread pool, and using that thread to decode the sentence. If a decoding thread is not available,
 + * it will block until one is in a fair (FIFO) manner. RequestParallelizer thereby permits intra-request
 + * parallelization by separating out reading the input stream from processing the translated sentences,
 + * but also ensures that round-robin parallelization occurs, since RequestParallelizer uses the
 + * thread pool before translating each request.
 + *
 + * A decoding thread is handled by DecoderThread and launched from DecoderThreadRunner. The purpose
 + * of the runner is to record where to place the translated sentence when it is done (i.e., which
 + * Translations object). Translations itself is an iterator whose next() call blocks until the next
 + * translation is available.
 + *
 + * @author Matt Post post@cs.jhu.edu
 + * @author Zhifei Li, zhifei.work@gmail.com
 + * @author wren ng thornton wren@users.sourceforge.net
 + * @author Lane Schwartz dowobeha@users.sourceforge.net
 + */
 +public class Decoder {
 +
 +  private static final Logger LOG = LoggerFactory.getLogger(Decoder.class);
 +
 +  private final JoshuaConfiguration joshuaConfiguration;
 +
 +  public JoshuaConfiguration getJoshuaConfiguration() {
 +    return joshuaConfiguration;
 +  }
 +
 +  /*
 +   * Many of these objects themselves are global objects. We pass them in when constructing other
 +   * objects, so that they all share pointers to the same object. This is good because it reduces
 +   * overhead, but it can be problematic because of unseen dependencies (for example, in the
 +   * Vocabulary shared by language model, translation grammar, etc).
 +   */
 +  private final List<Grammar> grammars = new ArrayList<Grammar>();
 +  private final ArrayList<FeatureFunction> featureFunctions = new ArrayList<>();
 +  private Grammar customPhraseTable = null;
 +
 +  /* The feature weights. */
 +  public static FeatureVector weights;
 +
 +  public static int VERBOSE = 1;
 +
 +  private BlockingQueue<DecoderThread> threadPool = null;
 +
 +  // ===============================================================
 +  // Constructors
 +  // ===============================================================
 +
 +  /**
 +   * Constructor method that creates a new decoder using the specified configuration file.
 +   *
 +   * @param joshuaConfiguration a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
 +   * @param configFile name of configuration file.
 +   */
 +  public Decoder(JoshuaConfiguration joshuaConfiguration, String configFile) {
 +    this(joshuaConfiguration);
 +    this.initialize(configFile);
 +  }
 +
 +  /**
 +   * Factory method that creates a new decoder using the specified configuration file.
 +   *
 +   * @param configFile Name of configuration file.
 +   * @return a configured {@link org.apache.joshua.decoder.Decoder}
 +   */
 +  public static Decoder createDecoder(String configFile) {
 +    JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
 +    return new Decoder(joshuaConfiguration, configFile);
 +  }
 +
 +  /**
 +   * Constructs an uninitialized decoder for use in testing.
 +   * <p>
 +   * This method is private because it should only ever be called by the
 +   * {@link #getUninitalizedDecoder()} method to provide an uninitialized decoder for use in
 +   * testing.
 +   */
 +  private Decoder(JoshuaConfiguration joshuaConfiguration) {
 +    this.joshuaConfiguration = joshuaConfiguration;
 +    this.threadPool = new ArrayBlockingQueue<DecoderThread>(
 +        this.joshuaConfiguration.num_parallel_decoders, true);
++    this.customPhraseTable = null;
++    
++    resetGlobalState();
 +  }
 +
 +  /**
 +   * Gets an uninitialized decoder for use in testing.
 +   * <p>
 +   * This method is called by unit tests or any outside packages (e.g., MERT) relying on the
 +   * decoder.
 +   * @param joshuaConfiguration a {@link org.apache.joshua.decoder.JoshuaConfiguration} object
 +   * @return an uninitialized decoder for use in testing
 +   */
 +  static public Decoder getUninitalizedDecoder(JoshuaConfiguration joshuaConfiguration) {
 +    return new Decoder(joshuaConfiguration);
 +  }
 +
 +  // ===============================================================
 +  // Public Methods
 +  // ===============================================================
 +
 +  /**
 +   * This class is responsible for getting sentences from the TranslationRequest and procuring a
 +   * DecoderThreadRunner to translate it. Each call to decodeAll(TranslationRequest) launches a
 +   * thread that will read the request's sentences, obtain a DecoderThread to translate them, and
 +   * then place the Translation in the appropriate place.
 +   *
 +   * @author Matt Post <po...@cs.jhu.edu>
 +   *
 +   */
 +  private class RequestParallelizer extends Thread {
 +    /* Source of sentences to translate. */
 +    private final TranslationRequestStream request;
 +
 +    /* Where to put translated sentences. */
 +    private final Translations response;
 +
 +    RequestParallelizer(TranslationRequestStream request, Translations response) {
 +      this.request = request;
 +      this.response = response;
 +    }
 +
 +    @Override
 +    public void run() {
 +      /*
 +       * Repeatedly get an input sentence, wait for a DecoderThread, and then start a new thread to
 +       * translate the sentence. We start a new thread (via DecoderRunnerThread) as opposed to
 +       * blocking, so that the RequestHandler can go on to the next sentence in this request, which
 +       * allows parallelization across the sentences of the request.
 +       */
 +      for (;;) {
 +        Sentence sentence = request.next();
 +
 +        if (sentence == null) {
 +          response.finish();
 +          break;
 +        }
 +
 +        // This will block until a DecoderThread becomes available.
 +        DecoderThread thread = Decoder.this.getThread();
 +        new DecoderThreadRunner(thread, sentence, response).start();
 +      }
 +    }
 +
 +  }
 +
 +  /**
 +   * Retrieve a thread from the thread pool, blocking until one is available. The blocking occurs in
 +   * a fair fashion (i.e,. FIFO across requests).
 +   *
 +   * @return a thread that can be used for decoding.
 +   */
 +  public DecoderThread getThread() {
 +    try {
 +      return threadPool.take();
 +    } catch (InterruptedException e) {
 +      // TODO Auto-generated catch block
 +      e.printStackTrace();
 +    }
 +    return null;
 +  }
 +
 +  /**
 +   * This class handles running a DecoderThread (which takes care of the actual translation of an
 +   * input Sentence, returning a Translation object when its done). This is done in a thread so as
 +   * not to tie up the RequestHandler that launched it, freeing it to go on to the next sentence in
 +   * the TranslationRequest, in turn permitting parallelization across the sentences of a request.
 +   *
 +   * When the decoder thread is finshed, the Translation object is placed in the correct place in
 +   * the corresponding Translations object that was returned to the caller of
 +   * Decoder.decodeAll(TranslationRequest).
 +   *
 +   * @author Matt Post <po...@cs.jhu.edu>
 +   */
 +  private class DecoderThreadRunner extends Thread {
 +
 +    private final DecoderThread decoderThread;
 +    private final Sentence sentence;
 +    private final Translations translations;
 +
 +    DecoderThreadRunner(DecoderThread thread, Sentence sentence, Translations translations) {
 +      this.decoderThread = thread;
 +      this.sentence = sentence;
 +      this.translations = translations;
 +    }
 +
 +    @Override
 +    public void run() {
 +      /*
 +       * Process any found metadata.
 +       */
 +      
 +      /*
 +       * Use the thread to translate the sentence. Then record the translation with the
 +       * corresponding Translations object, and return the thread to the pool.
 +       */
 +      try {
 +        Translation translation = decoderThread.translate(this.sentence);
 +        translations.record(translation);
 +
 +        /*
 +         * This is crucial! It's what makes the thread available for the next sentence to be
 +         * translated.
 +         */
 +        threadPool.put(decoderThread);
 +      } catch (Exception e) {
 +        throw new RuntimeException(String.format(
 +            "Input %d: FATAL UNCAUGHT EXCEPTION: %s", sentence.id(), e.getMessage()), e);
 +        //        translations.record(new Translation(sentence, null, featureFunctions, joshuaConfiguration));
 +      }
 +    }
 +  }
 +
 +  /**
 +   * This function is the main entry point into the decoder. It translates all the sentences in a
 +   * (possibly boundless) set of input sentences. Each request launches its own thread to read the
 +   * sentences of the request.
 +   *
 +   * @param request the populated {@link org.apache.joshua.decoder.io.TranslationRequestStream}
 +   * @throws IOException if there is an error with the input stream or writing the output
 +   * @return an iterable, asynchronously-filled list of Translations
 +   */
 +  public Translations decodeAll(TranslationRequestStream request) throws IOException {
 +    Translations translations = new Translations(request);
 +
 +    /* Start a thread to handle requests on the input stream */
 +    new RequestParallelizer(request, translations).start();
 +
 +    return translations;
 +  }
 +
 +
 +  /**
 +   * We can also just decode a single sentence.
 +   *
 +   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
 +   * @return the sentence {@link org.apache.joshua.decoder.Translation}
 +   */
 +  public Translation decode(Sentence sentence) {
 +    // Get a thread.
 +
 +    try {
 +      DecoderThread thread = threadPool.take();
 +      Translation translation = thread.translate(sentence);
 +      threadPool.put(thread);
 +
 +      return translation;
 +
 +    } catch (InterruptedException e) {
 +      e.printStackTrace();
 +    }
 +
 +    return null;
 +  }
 +
 +  /**
 +   * Clean shutdown of Decoder, resetting all
 +   * static variables, such that any other instance of Decoder
 +   * afterwards gets a fresh start.
 +   */
 +  public void cleanUp() {
 +    // shut down DecoderThreads
 +    for (DecoderThread thread : threadPool) {
 +      try {
 +        thread.join();
 +      } catch (InterruptedException e) {
 +        e.printStackTrace();
 +      }
 +    }
 +    resetGlobalState();
 +  }
 +
 +  public static void resetGlobalState() {
 +    // clear/reset static variables
 +    OwnerMap.clear();
 +    FeatureMap.clear();
 +    Vocabulary.clear();
 +    Vocabulary.unregisterLanguageModels();
 +    LanguageModelFF.resetLmIndex();
 +    StatefulFF.resetGlobalStateIndex();
 +  }
 +
 +  public static void writeConfigFile(double[] newWeights, String template, String outputFile,
 +      String newDiscriminativeModel) {
 +    try {
 +      int columnID = 0;
 +
 +      BufferedWriter writer = FileUtility.getWriteFileStream(outputFile);
 +      LineReader reader = new LineReader(template);
 +      try {
 +        for (String line : reader) {
 +          line = line.trim();
-           if (Regex.commentOrEmptyLine.matches(line) || line.indexOf("=") != -1) {
++          if (Regex.commentOrEmptyLine.matches(line) || line.contains("=")) {
 +            // comment, empty line, or parameter lines: just copy
 +            writer.write(line);
 +            writer.newLine();
 +
 +          } else { // models: replace the weight
 +            String[] fds = Regex.spaces.split(line);
 +            StringBuffer newSent = new StringBuffer();
 +            if (!Regex.floatingNumber.matches(fds[fds.length - 1])) {
 +              throw new IllegalArgumentException("last field is not a number; the field is: "
 +                  + fds[fds.length - 1]);
 +            }
 +
 +            if (newDiscriminativeModel != null && "discriminative".equals(fds[0])) {
 +              newSent.append(fds[0]).append(' ');
 +              newSent.append(newDiscriminativeModel).append(' ');// change the
 +              // file name
 +              for (int i = 2; i < fds.length - 1; i++) {
 +                newSent.append(fds[i]).append(' ');
 +              }
 +            } else {// regular
 +              for (int i = 0; i < fds.length - 1; i++) {
 +                newSent.append(fds[i]).append(' ');
 +              }
 +            }
 +            if (newWeights != null)
 +              newSent.append(newWeights[columnID++]);// change the weight
 +            else
 +              newSent.append(fds[fds.length - 1]);// do not change
 +
 +            writer.write(newSent.toString());
 +            writer.newLine();
 +          }
 +        }
 +      } finally {
 +        reader.close();
 +        writer.close();
 +      }
 +
 +      if (newWeights != null && columnID != newWeights.length) {
 +        throw new IllegalArgumentException("number of models does not match number of weights");
 +      }
 +
 +    } catch (IOException e) {
 +      e.printStackTrace();
 +    }
 +  }
 +
 +  // ===============================================================
 +  // Initialization Methods
 +  // ===============================================================
 +
 +  /**
 +   * Initialize all parts of the JoshuaDecoder.
 +   *
 +   * @param configFile File containing configuration options
 +   * @return An initialized decoder
 +   */
 +  public Decoder initialize(String configFile) {
 +    try {
 +
 +      long pre_load_time = System.currentTimeMillis();
 +
 +      /* Weights can be listed in a separate file (denoted by parameter "weights-file") or directly
 +       * in the Joshua config file. Config file values take precedent.
 +       */
 +      this.readWeights(joshuaConfiguration.weights_file);
 +      
 +      
 +      /* Add command-line-passed weights to the weights array for processing below */
 +      if (!Strings.isNullOrEmpty(joshuaConfiguration.weight_overwrite)) {
 +        String[] tokens = joshuaConfiguration.weight_overwrite.split("\\s+");
 +        for (int i = 0; i < tokens.length; i += 2) {
 +          String feature = tokens[i];
 +          float value = Float.parseFloat(tokens[i+1]);
 +
 +          if (joshuaConfiguration.moses)
 +            feature = demoses(feature);
 +
 +          joshuaConfiguration.weights.add(String.format("%s %s", feature, tokens[i+1]));
 +          LOG.info("COMMAND LINE WEIGHT: {} -> {}", feature, value);
 +        }
 +      }
 +
 +      /* Read the weights found in the config file */
 +      for (String pairStr: joshuaConfiguration.weights) {
 +        String pair[] = pairStr.split("\\s+");
 +
 +        /* Sanity check for old-style unsupported feature invocations. */
 +        if (pair.length != 2) {
-           StringBuilder errMsg = new StringBuilder();
-           errMsg.append("FATAL: Invalid feature weight line found in config file.\n");
-           errMsg.append(String.format("The line was '%s'\n", pairStr));
-           errMsg.append("You might be using an old version of the config file that is no longer supported\n");
-           errMsg.append("Check joshua.apache.org or email dev@joshua.apache.org for help\n");
-           errMsg.append("Code = " + 17);
-           throw new RuntimeException(errMsg.toString());
++          String errMsg = "FATAL: Invalid feature weight line found in config file.\n" +
++              String.format("The line was '%s'\n", pairStr) +
++              "You might be using an old version of the config file that is no longer supported\n" +
++              "Check joshua.apache.org or email dev@joshua.apache.org for help\n" +
++              "Code = " + 17;
++          throw new RuntimeException(errMsg);
 +        }
 +
 +        weights.add(hashFeature(pair[0]), Float.parseFloat(pair[1]));
 +      }
 +
 +      LOG.info("Read {} weights", weights.size());
 +
 +      // Do this before loading the grammars and the LM.
 +      this.featureFunctions.clear();
 +
 +      // Initialize and load grammars. This must happen first, since the vocab gets defined by
 +      // the packed grammar (if any)
 +      this.initializeTranslationGrammars();
 +      LOG.info("Grammar loading took: {} seconds.",
 +          (System.currentTimeMillis() - pre_load_time) / 1000);
 +
 +      // Initialize the features: requires that LM model has been initialized.
 +      this.initializeFeatureFunctions();
 +
 +      // This is mostly for compatibility with the Moses tuning script
 +      if (joshuaConfiguration.show_weights_and_quit) {
 +        for (Entry<Integer, Float> entry : weights.entrySet()) {
 +          System.out.println(String.format("%s=%.5f", FeatureMap.getFeature(entry.getKey()), entry.getValue()));
 +        }
 +        // TODO (fhieber): this functionality should not be in main Decoder class and simply exit.
 +        System.exit(0);
 +      }
 +
 +      // Sort the TM grammars (needed to do cube pruning)
 +      if (joshuaConfiguration.amortized_sorting) {
 +        LOG.info("Grammar sorting happening lazily on-demand.");
 +      } else {
 +        long pre_sort_time = System.currentTimeMillis();
 +        for (Grammar grammar : this.grammars) {
 +          grammar.sortGrammar(this.featureFunctions);
 +        }
 +        LOG.info("Grammar sorting took {} seconds.",
 +            (System.currentTimeMillis() - pre_sort_time) / 1000);
 +      }
 +
 +      // Create the threads
 +      for (int i = 0; i < joshuaConfiguration.num_parallel_decoders; i++) {
 +        this.threadPool.put(new DecoderThread(this.grammars, Decoder.weights,
 +            this.featureFunctions, joshuaConfiguration));
 +      }
 +    } catch (IOException | InterruptedException e) {
 +      LOG.warn(e.getMessage(), e);
 +    }
 +
 +    return this;
 +  }
 +
 +  /**
 +   * Initializes translation grammars Retained for backward compatibility
 +   *
 +   * @param ownersSeen Records which PhraseModelFF's have been instantiated (one is needed for each
 +   *          owner)
 +   * @throws IOException
 +   */
 +  private void initializeTranslationGrammars() throws IOException {
 +
 +    if (joshuaConfiguration.tms.size() > 0) {
 +
 +      // collect packedGrammars to check if they use a shared vocabulary
 +      final List<PackedGrammar> packed_grammars = new ArrayList<>();
 +
 +      // tm = {thrax/hiero,packed,samt,moses} OWNER LIMIT FILE
 +      for (String tmLine : joshuaConfiguration.tms) {
 +
 +        String type = tmLine.substring(0,  tmLine.indexOf(' '));
 +        String[] args = tmLine.substring(tmLine.indexOf(' ')).trim().split("\\s+");
 +        HashMap<String, String> parsedArgs = FeatureFunction.parseArgs(args);
 +
 +        String owner = parsedArgs.get("owner");
 +        int span_limit = Integer.parseInt(parsedArgs.get("maxspan"));
 +        String path = parsedArgs.get("path");
 +
 +        Grammar grammar = null;
 +        if (! type.equals("moses") && ! type.equals("phrase")) {
 +          if (new File(path).isDirectory()) {
 +            try {
 +              PackedGrammar packed_grammar = new PackedGrammar(path, span_limit, owner, type, joshuaConfiguration);
 +              packed_grammars.add(packed_grammar);
 +              grammar = packed_grammar;
 +            } catch (FileNotFoundException e) {
 +              String msg = String.format("Couldn't load packed grammar from '%s'", path)
 +                  + "Perhaps it doesn't exist, or it may be an old packed file format.";
 +              throw new RuntimeException(msg);
 +            }
 +          } else {
 +            // thrax, hiero, samt
 +            grammar = new MemoryBasedBatchGrammar(type, path, owner,
 +                joshuaConfiguration.default_non_terminal, span_limit, joshuaConfiguration);
 +          }
 +
 +        } else {
 +
 +          joshuaConfiguration.search_algorithm = "stack";
 +          grammar = new PhraseTable(path, owner, type, joshuaConfiguration);
 +        }
 +
 +        this.grammars.add(grammar);
 +      }
 +
 +      checkSharedVocabularyChecksumsForPackedGrammars(packed_grammars);
 +
 +    } else {
 +      LOG.warn("no grammars supplied!  Supplying dummy glue grammar.");
 +      MemoryBasedBatchGrammar glueGrammar = new MemoryBasedBatchGrammar("glue", joshuaConfiguration, -1);
 +      glueGrammar.addGlueRules(featureFunctions);
 +      this.grammars.add(glueGrammar);
 +    }
 +    
 +    /* Add the grammar for custom entries */
 +    if (joshuaConfiguration.search_algorithm.equals("stack"))
 +      this.customPhraseTable = new PhraseTable("custom", joshuaConfiguration);
 +    else
 +      this.customPhraseTable = new MemoryBasedBatchGrammar("custom", joshuaConfiguration, 20);
 +    this.grammars.add(this.customPhraseTable);
 +    
 +    /* Create an epsilon-deleting grammar */
 +    if (joshuaConfiguration.lattice_decoding) {
 +      LOG.info("Creating an epsilon-deleting grammar");
 +      MemoryBasedBatchGrammar latticeGrammar = new MemoryBasedBatchGrammar("lattice", joshuaConfiguration, -1);
 +      HieroFormatReader reader = new HieroFormatReader(OwnerMap.register("lattice"));
 +
 +      String goalNT = FormatUtils.cleanNonTerminal(joshuaConfiguration.goal_symbol);
 +      String defaultNT = FormatUtils.cleanNonTerminal(joshuaConfiguration.default_non_terminal);
 +
 +      //FIXME: too many arguments
 +      String ruleString = String.format("[%s] ||| [%s,1] <eps> ||| [%s,1] ||| ", goalNT, goalNT, defaultNT,
 +          goalNT, defaultNT);
 +
 +      Rule rule = reader.parseLine(ruleString);
 +      latticeGrammar.addRule(rule);
 +      rule.estimateRuleCost(featureFunctions);
 +
 +      this.grammars.add(latticeGrammar);
 +    }
 +
 +    /* Now create a feature function for each owner */
-     final Set<OwnerId> ownersSeen = new HashSet<OwnerId>();
++    final Set<OwnerId> ownersSeen = new HashSet<>();
 +
 +    for (Grammar grammar: this.grammars) {
 +      OwnerId owner = grammar.getOwner();
 +      if (! ownersSeen.contains(owner)) {
 +        this.featureFunctions.add(
 +            new PhraseModel(
 +                weights, new String[] { "tm", "-owner", getOwner(owner) }, joshuaConfiguration, grammar));
 +        ownersSeen.add(owner);
 +      }
 +    }
 +
 +    LOG.info("Memory used {} MB",
 +        ((Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1000000.0));
 +  }
 +
 +  /**
 +   * Checks if multiple packedGrammars have the same vocabulary by comparing their vocabulary file checksums.
 +   */
 +  private static void checkSharedVocabularyChecksumsForPackedGrammars(final List<PackedGrammar> packed_grammars) {
 +    String previous_checksum = "";
 +    for (PackedGrammar grammar : packed_grammars) {
 +      final String checksum = grammar.computeVocabularyChecksum();
 +      if (previous_checksum.isEmpty()) {
 +        previous_checksum = checksum;
 +      } else {
 +        if (!checksum.equals(previous_checksum)) {
 +          throw new RuntimeException(
 +              "Trying to load multiple packed grammars with different vocabularies!" +
 +                  "Have you packed them jointly?");
 +        }
 +        previous_checksum = checksum;
 +      }
 +    }
 +  }
 +
 +  /*
 +   * This function reads the weights for the model. Feature names and their weights are listed one
 +   * per line in the following format:
 +   * 
 +   * FEATURE_NAME WEIGHT
 +   */
 +  private void readWeights(String fileName) {
 +    Decoder.weights = new FeatureVector(5);
 +
 +    if (fileName.equals(""))
 +      return;
 +
 +    try {
 +      LineReader lineReader = new LineReader(fileName);
 +
 +      for (String line : lineReader) {
 +        line = line.replaceAll(spaceSeparator, " ");
 +
 +        if (line.equals("") || line.startsWith("#") || line.startsWith("//")
 +            || line.indexOf(' ') == -1)
 +          continue;
 +
 +        String tokens[] = line.split(spaceSeparator);
 +        String feature = tokens[0];
 +        Float value = Float.parseFloat(tokens[1]);
 +
 +        // Kludge for compatibility with Moses tuners
 +        if (joshuaConfiguration.moses) {
 +          feature = demoses(feature);
 +        }
 +
 +        weights.add(hashFeature(feature), value);
 +      }
 +    } catch (IOException ioe) {
 +      throw new RuntimeException(ioe);
 +    }
 +    LOG.info("Read {} weights from file '{}'", weights.size(), fileName);
 +  }
 +
 +  private String demoses(String feature) {
 +    if (feature.endsWith("="))
 +      feature = feature.replace("=", "");
 +    if (feature.equals("OOV_Penalty"))
 +      feature = "OOVPenalty";
 +    else if (feature.startsWith("tm-") || feature.startsWith("lm-"))
 +      feature = feature.replace("-",  "_");
 +    return feature;
 +  }
 +
 +  /**
 +   * Feature functions are instantiated with a line of the form
 +   *
 +   * <pre>
 +   *   FEATURE OPTIONS
 +   * </pre>
 +   *
 +   * Weights for features are listed separately.
 +   *
 +   * @throws IOException
 +   *
 +   */
 +  private void initializeFeatureFunctions() throws IOException {
 +
 +    for (String featureLine : joshuaConfiguration.features) {
 +      // line starts with NAME, followed by args
 +      // 1. create new class named NAME, pass it config, weights, and the args
 +
 +      String fields[] = featureLine.split("\\s+");
 +      String featureName = fields[0];
 +      
 +      try {
 +        
 +        Class<?> clas = getFeatureFunctionClass(featureName);
 +        Constructor<?> constructor = clas.getConstructor(FeatureVector.class,
 +            String[].class, JoshuaConfiguration.class);
 +        FeatureFunction feature = (FeatureFunction) constructor.newInstance(weights, fields, joshuaConfiguration);
 +        this.featureFunctions.add(feature);
 +        
 +      } catch (Exception e) {
 +        throw new RuntimeException(String.format("Unable to instantiate feature function '%s'!", featureLine), e); 
 +      }
 +    }
 +
 +    for (FeatureFunction feature : featureFunctions) {
 +      LOG.info("FEATURE: {}", feature.logString());
 +    }
 +  }
 +
 +  /**
 +   * Searches a list of predefined paths for classes, and returns the first one found. Meant for
 +   * instantiating feature functions.
 +   *
 +   * @param name
 +   * @return the class, found in one of the search paths
 +   * @throws ClassNotFoundException
 +   */
 +  private Class<?> getFeatureFunctionClass(String featureName) {
 +    Class<?> clas = null;
 +
 +    String[] packages = { "org.apache.joshua.decoder.ff", "org.apache.joshua.decoder.ff.lm", "org.apache.joshua.decoder.ff.phrase" };
 +    for (String path : packages) {
 +      try {
 +        clas = Class.forName(String.format("%s.%s", path, featureName));
 +        break;
 +      } catch (ClassNotFoundException e) {
 +        try {
 +          clas = Class.forName(String.format("%s.%sFF", path, featureName));
 +          break;
 +        } catch (ClassNotFoundException e2) {
 +          // do nothing
 +        }
 +      }
 +    }
 +    return clas;
 +  }
 +  
 +  /**
 +   * Adds a rule to the custom grammar.  
 +   * 
 +   * @param rule the rule to add
 +   */
 +  public void addCustomRule(Rule rule) {
 +    customPhraseTable.addRule(rule);
 +    rule.estimateRuleCost(featureFunctions);
 +  }
 +
 +  public Grammar getCustomPhraseTable() {
 +    return customPhraseTable;
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderThread.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/decoder/DecoderThread.java
index d095e8d,0000000..b570d5f
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderThread.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/DecoderThread.java
@@@ -1,201 -1,0 +1,201 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder;
 +
 +import java.io.IOException;
 +import java.util.ArrayList;
 +import java.util.List;
 +
 +import org.apache.joshua.decoder.chart_parser.Chart;
 +import org.apache.joshua.decoder.ff.FeatureFunction;
 +import org.apache.joshua.decoder.ff.FeatureVector;
 +import org.apache.joshua.decoder.ff.SourceDependentFF;
 +import org.apache.joshua.decoder.ff.tm.Grammar;
 +import org.apache.joshua.decoder.hypergraph.ForestWalker;
 +import org.apache.joshua.decoder.hypergraph.GrammarBuilderWalkerFunction;
 +import org.apache.joshua.decoder.hypergraph.HyperGraph;
 +import org.apache.joshua.decoder.phrase.Stacks;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +/**
 + * This class handles decoding of individual Sentence objects (which can represent plain sentences
 + * or lattices). A single sentence can be decoded by a call to translate() and, if an InputHandler
 + * is used, many sentences can be decoded in a thread-safe manner via a single call to
 + * translateAll(), which continually queries the InputHandler for sentences until they have all been
 + * consumed and translated.
 + * 
 + * The DecoderFactory class is responsible for launching the threads.
 + * 
 + * @author Matt Post post@cs.jhu.edu
 + * @author Zhifei Li, zhifei.work@gmail.com
 + */
 +
 +public class DecoderThread extends Thread {
 +  private static final Logger LOG = LoggerFactory.getLogger(DecoderThread.class);
 +
 +  private final JoshuaConfiguration joshuaConfiguration;
 +  /*
 +   * these variables may be the same across all threads (e.g., just copy from DecoderFactory), or
 +   * differ from thread to thread
 +   */
 +  private final List<Grammar> allGrammars;
 +  private final List<FeatureFunction> featureFunctions;
 +
 +
 +  // ===============================================================
 +  // Constructor
 +  // ===============================================================
 +  public DecoderThread(List<Grammar> grammars, FeatureVector weights,
 +      List<FeatureFunction> featureFunctions, JoshuaConfiguration joshuaConfiguration) throws IOException {
 +
 +    this.joshuaConfiguration = joshuaConfiguration;
 +    this.allGrammars = grammars;
 +
-     this.featureFunctions = new ArrayList<FeatureFunction>();
++    this.featureFunctions = new ArrayList<>();
 +    for (FeatureFunction ff : featureFunctions) {
 +      if (ff instanceof SourceDependentFF) {
 +        this.featureFunctions.add(((SourceDependentFF) ff).clone());
 +      } else {
 +        this.featureFunctions.add(ff);
 +      }
 +    }
 +  }
 +
 +  // ===============================================================
 +  // Methods
 +  // ===============================================================
 +
 +  @Override
 +  public void run() {
 +    // Nothing to do but wait.
 +  }
 +
 +  /**
 +   * Translate a sentence.
 +   * 
 +   * @param sentence The sentence to be translated.
 +   * @return the sentence {@link org.apache.joshua.decoder.Translation}
 +   */
 +  public Translation translate(Sentence sentence) {
 +
 +    LOG.info("Input {}: {}", sentence.id(), sentence.fullSource());
 +
 +    if (sentence.target() != null)
 +      LOG.info("Input {}: Constraining to target sentence '{}'",
 +          sentence.id(), sentence.target());
 +
 +    // skip blank sentences
 +    if (sentence.isEmpty()) {
 +      LOG.info("Translation {}: Translation took 0 seconds", sentence.id());
 +      return new Translation(sentence, null, featureFunctions, joshuaConfiguration);
 +    }
 +
 +    long startTime = System.currentTimeMillis();
 +
 +    int numGrammars = allGrammars.size();
 +    Grammar[] grammars = new Grammar[numGrammars];
 +
 +    for (int i = 0; i < allGrammars.size(); i++)
 +      grammars[i] = allGrammars.get(i);
 +
 +    if (joshuaConfiguration.segment_oovs)
 +      sentence.segmentOOVs(grammars);
 +
 +    /**
 +     * Joshua supports (as of September 2014) both phrase-based and hierarchical decoding. Here
 +     * we build the appropriate chart. The output of both systems is a hypergraph, which is then
 +     * used for further processing (e.g., k-best extraction).
 +     */
 +    HyperGraph hypergraph = null;
 +    try {
 +
 +      if (joshuaConfiguration.search_algorithm.equals("stack")) {
 +        Stacks stacks = new Stacks(sentence, this.featureFunctions, grammars, joshuaConfiguration);
 +
 +        hypergraph = stacks.search();
 +      } else {
 +        /* Seeding: the chart only sees the grammars, not the factories */
 +        Chart chart = new Chart(sentence, this.featureFunctions, grammars,
 +            joshuaConfiguration.goal_symbol, joshuaConfiguration);
 +
 +        hypergraph = (joshuaConfiguration.use_dot_chart) 
 +            ? chart.expand() 
 +                : chart.expandSansDotChart();
 +      }
 +
 +    } catch (java.lang.OutOfMemoryError e) {
 +      LOG.error("Input {}: out of memory", sentence.id());
 +      hypergraph = null;
 +    }
 +
 +    float seconds = (System.currentTimeMillis() - startTime) / 1000.0f;
 +    LOG.info("Input {}: Translation took {} seconds", sentence.id(), seconds);
 +    LOG.info("Input {}: Memory used is {} MB", sentence.id(), (Runtime
 +        .getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1000000.0);
 +
 +    /* Return the translation unless we're doing synchronous parsing. */
 +    if (!joshuaConfiguration.parse || hypergraph == null) {
 +      return new Translation(sentence, hypergraph, featureFunctions, joshuaConfiguration);
 +    }
 +
 +    /*****************************************************************************************/
 +
 +    /*
 +     * Synchronous parsing.
 +     * 
 +     * Step 1. Traverse the hypergraph to create a grammar for the second-pass parse.
 +     */
 +    Grammar newGrammar = getGrammarFromHyperGraph(joshuaConfiguration.goal_symbol, hypergraph);
 +    newGrammar.sortGrammar(this.featureFunctions);
 +    long sortTime = System.currentTimeMillis();
 +    LOG.info("Sentence {}: New grammar has {} rules.", sentence.id(),
 +        newGrammar.getNumRules());
 +
 +    /* Step 2. Create a new chart and parse with the instantiated grammar. */
 +    Grammar[] newGrammarArray = new Grammar[] { newGrammar };
 +    Sentence targetSentence = new Sentence(sentence.target(), sentence.id(), joshuaConfiguration);
 +    Chart chart = new Chart(targetSentence, featureFunctions, newGrammarArray, "GOAL",joshuaConfiguration);
 +    int goalSymbol = GrammarBuilderWalkerFunction.goalSymbol(hypergraph);
 +    String goalSymbolString = Vocabulary.word(goalSymbol);
 +    LOG.info("Sentence {}: goal symbol is {} ({}).", sentence.id(),
 +        goalSymbolString, goalSymbol);
 +    chart.setGoalSymbolID(goalSymbol);
 +
 +    /* Parsing */
 +    HyperGraph englishParse = chart.expand();
 +    long secondParseTime = System.currentTimeMillis();
 +    LOG.info("Sentence {}: Finished second chart expansion ({} seconds).",
 +        sentence.id(), (secondParseTime - sortTime) / 1000);
 +    LOG.info("Sentence {} total time: {} seconds.\n", sentence.id(),
 +        (secondParseTime - startTime) / 1000);
 +    LOG.info("Memory used after sentence {} is {} MB", sentence.id(), (Runtime
 +        .getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1000000.0);
 +    return new Translation(sentence, englishParse, featureFunctions, joshuaConfiguration); // or do something else
 +  }
 +
 +  private Grammar getGrammarFromHyperGraph(String goal, HyperGraph hg) {
 +    GrammarBuilderWalkerFunction f = new GrammarBuilderWalkerFunction(goal, joshuaConfiguration, "pt");
 +    ForestWalker walker = new ForestWalker();
 +    walker.walk(hg.goalNode, f);
 +    return f.getGrammar();
 +  }
 +}