You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/23 18:45:35 UTC

[24/60] [partial] incubator-joshua git commit: maven multi-module layout 1st commit: moving files into joshua-core

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java
new file mode 100644
index 0000000..ee22b94
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.ui.tree_visualizer.browser;
+
+import java.awt.BorderLayout;
+import java.awt.Color;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Scanner;
+
+import javax.swing.DefaultListModel;
+import javax.swing.JFrame;
+import javax.swing.JList;
+import javax.swing.JScrollPane;
+import javax.swing.JTextField;
+import javax.swing.ListSelectionModel;
+import javax.swing.event.DocumentEvent;
+import javax.swing.event.DocumentListener;
+import javax.swing.event.ListSelectionEvent;
+import javax.swing.event.ListSelectionListener;
+
+import org.apache.joshua.ui.tree_visualizer.tree.Tree;
+import org.apache.joshua.util.io.LineReader;
+
+public class Browser {
+
+  /**
+   * A list that contains the one best translation of each source sentence.
+   */
+  private static JList oneBestList;
+
+  private static JTextField searchBox;
+
+  /**
+   * The current frame that displays a derivation tree.
+   */
+  private static List<DerivationTreeFrame> activeFrame;
+
+  private static List<TranslationInfo> translations;
+  /**
+   * Default width of the chooser frame.
+   */
+  private static final int DEFAULT_WIDTH = 640;
+
+  /**
+   * Default height of the chooser frame.
+   */
+  private static final int DEFAULT_HEIGHT = 480;
+
+  /**
+   * List of colors to be used in derivation trees
+   */
+  static final Color[] dataSetColors = { Color.red, Color.orange, Color.blue, Color.green };
+
+  /**
+   * @param argv the paths to the source, reference, and n-best files
+   * @throws IOException if there is an error reading from standard input
+   */
+  public static void main(String[] argv) throws IOException {
+    String sourcePath = argv.length > 0 ? argv[0] : null;
+    String referencePath = argv.length > 1 ? argv[1] : null;
+    String[] translationPaths = new String[0];
+    if (argv.length > 2) {
+      translationPaths = Arrays.copyOfRange(argv, 2, argv.length);
+    }
+    translations = new ArrayList<TranslationInfo>();
+    readSourcesFromPath(sourcePath);
+    readReferencesFromPath(referencePath);
+    for (String tp : translationPaths) {
+      readTranslationsFromPath(tp);
+    }
+    initializeChooserFrame();
+    return;
+  }
+
+  private static void readSourcesFromPath(String path) throws IOException {
+    for (String line: new LineReader(path)) {
+      TranslationInfo ti = new TranslationInfo();
+      ti.setSourceSentence("<s> " + line + " </s>");
+      translations.add(ti);
+    }
+  }
+
+  private static void readReferencesFromPath(String path) throws IOException {
+    Scanner scanner = new Scanner(new File(path), "UTF-8");
+    for (TranslationInfo ti : translations) {
+      if (scanner.hasNextLine()) {
+        ti.setReference(scanner.nextLine());
+      }
+    }
+    scanner.close();
+  }
+
+  private static void readTranslationsFromPath(String path) throws IOException {
+    Scanner scanner = new Scanner(new File(path), "UTF-8");
+    String sentenceIndex = null;
+    for (TranslationInfo ti : translations) {
+      while (scanner.hasNextLine()) {
+        final String[] fields = scanner.nextLine().split("\\|\\|\\|");
+        final String index = fields[0];
+        final String tree = fields[1].trim();
+        if (!index.equals(sentenceIndex)) {
+          sentenceIndex = index;
+          ti.translations().add(new Tree(tree));
+          break;
+        }
+      }
+    }
+    scanner.close();
+  }
+
+  /**
+   * Initializes the various JComponents in the chooser frame.
+   */
+  private static void initializeChooserFrame() {
+    JFrame chooserFrame = new JFrame("Joshua Derivation Tree Browser");
+    chooserFrame.setLayout(new BorderLayout());
+
+    /*
+     * JMenuBar mb = new JMenuBar(); JMenu openMenu = new JMenu("Control"); JMenuItem src = new
+     * JMenuItem("Open source file ..."); JMenuItem ref = new JMenuItem("Open reference file ...");
+     * JMenuItem tgt = new JMenuItem("Open n-best derivations file ..."); JMenuItem quit = new
+     * JMenuItem("Quit");
+     * 
+     * new FileChoiceListener(chooserFrame, src, ref, tgt);
+     * 
+     * quit.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) {
+     * System.exit(0); } }); openMenu.add(src); openMenu.add(ref); openMenu.add(tgt);
+     * openMenu.add(quit); mb.add(openMenu); chooserFrame.setJMenuBar(mb);
+     */
+
+    searchBox = new JTextField("search");
+    searchBox.getDocument().addDocumentListener(new SearchListener());
+    searchBox.addActionListener(new ActionListener() {
+      public void actionPerformed(ActionEvent e) {
+        final int selectedIndex = oneBestList.getSelectedIndex();
+        Browser.search(selectedIndex < 0 ? 0 : selectedIndex + 1);
+      }
+    });
+    oneBestList = new JList(new DefaultListModel());
+    oneBestList.setFixedCellWidth(200);
+    oneBestList.setSelectionMode(ListSelectionModel.SINGLE_SELECTION);
+    // oneBestList.setCellRenderer(new DerivationBrowserListCellRenderer());
+
+    oneBestList.addListSelectionListener(new ListSelectionListener() {
+      public void valueChanged(ListSelectionEvent e) {
+        for (DerivationTreeFrame frame : activeFrame) {
+          frame.drawGraph(translations.get(oneBestList.getSelectedIndex()));
+        }
+        return;
+      }
+    });
+    chooserFrame.getContentPane().add(searchBox, BorderLayout.NORTH);
+    chooserFrame.getContentPane().add(new JScrollPane(oneBestList), BorderLayout.CENTER);
+
+    refreshLists();
+    chooserFrame.setSize(DEFAULT_WIDTH, DEFAULT_HEIGHT);
+    chooserFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+
+    activeFrame = new ArrayList<DerivationTreeFrame>();
+    int numNBestFiles = translations.get(0).translations().size();
+    for (int i = 0; i < numNBestFiles; i++)
+      activeFrame.add(new DerivationTreeFrame(i, oneBestList));
+    chooserFrame.setVisible(true);
+    return;
+  }
+
+  /**
+   * Removes and re-adds the appropriate values to the reference and one-best lists.
+   */
+  private static void refreshLists() {
+    oneBestList.removeAll();
+    DefaultListModel oneBestListModel = (DefaultListModel) oneBestList.getModel();
+    for (TranslationInfo ti : translations) {
+      oneBestListModel.addElement(ti.reference());
+    }
+    return;
+  }
+
+  private static void search(int fromIndex) {
+    final String query = searchBox.getText();
+    DefaultListModel oneBestListModel = (DefaultListModel) oneBestList.getModel();
+    for (int i = fromIndex; i < oneBestListModel.getSize(); i++) {
+      String reference = (String) oneBestListModel.getElementAt(i);
+      if (reference.indexOf(query) != -1) {
+        // found the query
+        oneBestList.setSelectedIndex(i);
+        oneBestList.ensureIndexIsVisible(i);
+        searchBox.setBackground(Color.white);
+        return;
+      }
+    }
+    searchBox.setBackground(Color.red);
+  }
+
+  private static class SearchListener implements DocumentListener {
+
+    public void insertUpdate(DocumentEvent e) {
+      final int selectedIndex = oneBestList.getSelectedIndex();
+      Browser.search(selectedIndex < 0 ? 0 : selectedIndex);
+    }
+
+    public void removeUpdate(DocumentEvent e) {
+      final String query = searchBox.getText();
+      if (query.equals("")) {
+        return;
+      } else {
+        insertUpdate(e);
+      }
+    }
+
+    public void changedUpdate(DocumentEvent e) {
+
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/DerivationTreeFrame.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/DerivationTreeFrame.java b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/DerivationTreeFrame.java
new file mode 100644
index 0000000..56366a0
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/DerivationTreeFrame.java
@@ -0,0 +1,253 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.ui.tree_visualizer.browser;
+
+import java.awt.BorderLayout;
+import java.awt.Color;
+import java.awt.GridLayout;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+
+import javax.swing.JButton;
+import javax.swing.JFrame;
+import javax.swing.JLabel;
+import javax.swing.JList;
+import javax.swing.JPanel;
+
+import org.apache.joshua.ui.tree_visualizer.DerivationTree;
+import org.apache.joshua.ui.tree_visualizer.DerivationViewer;
+import org.apache.joshua.ui.tree_visualizer.tree.Tree;
+
+/**
+ * A frame that displays a derivation tree.
+ * 
+ * @author jonny
+ * 
+ */
+class DerivationTreeFrame extends JFrame {
+  /**
+   * Eclipse seems to think serialVersionUID is important. I don't know why.
+   */
+  private static final long serialVersionUID = -3173826443907629130L;
+
+  /**
+   * A button to move to the next source-side sentence in the file.
+   */
+  JButton nextSource;
+  /**
+   * A button to move to the previous source-side sentence in the file.
+   */
+  JButton previousSource;
+
+  /**
+   * A button to show or hide extra information about the derivation.
+   */
+  private JButton informationButton;
+
+  /**
+   * A panel holding the extra information about the derivation.
+   */
+  private JPanel informationPanel;
+
+  /**
+   * A label holding the current source sentence.
+   */
+  private JLabel sourceLabel;
+
+  /**
+   * A label holding the reference translation of the current source sentence.
+   */
+  private JLabel referenceLabel;
+
+  /**
+   * A label holding the one-best translation of the current source sentence.
+   */
+  private JLabel oneBestLabel;
+
+  /**
+   * A panel that holds the buttons, as well as labels to show which derivation
+   * is currently being displayed.
+   */
+  private JPanel controlPanel;
+  /**
+   * A panel used to display the derivation tree itself.
+   */
+  private JPanel viewPanel;
+
+  /**
+   * This component displays the derivation tree's JUNG graph.
+   */
+  private DerivationViewer dv;
+
+  /**
+   * Index to determine which data set (which n-best file) this frame brings its
+   * graphs from.
+   */
+  private final int dataSetIndex;
+
+  private static final int DEFAULT_WIDTH = 640;
+  private static final int DEFAULT_HEIGHT = 480;
+
+  /**
+   * Color to use to render target-side trees.
+   */
+  private Color targetColor;
+
+  private JList mainList;
+
+  /**
+   * The default constructor.
+   */
+  public DerivationTreeFrame(int index, JList mainList) {
+    super("Joshua Derivation Tree");
+    this.mainList = mainList;
+    setLayout(new BorderLayout());
+    setSize(DEFAULT_WIDTH, DEFAULT_HEIGHT);
+    controlPanel = new JPanel(new BorderLayout());
+    informationPanel = new JPanel(new GridLayout(3, 1));
+
+    sourceLabel = new JLabel("source sentence");
+    referenceLabel = new JLabel("reference translation");
+    oneBestLabel = new JLabel("one best translation");
+
+    informationPanel.add(sourceLabel);
+    informationPanel.add(referenceLabel);
+    informationPanel.add(oneBestLabel);
+    informationPanel.setVisible(false);
+
+    controlPanel.add(informationPanel, BorderLayout.SOUTH);
+
+    initializeButtons();
+    layoutControl();
+
+    viewPanel = new JPanel(new BorderLayout());
+    dv = null;
+
+    dataSetIndex = index;
+    targetColor = Browser.dataSetColors[dataSetIndex % Browser.dataSetColors.length];
+
+    getContentPane().add(viewPanel, BorderLayout.CENTER);
+    getContentPane().add(controlPanel, BorderLayout.SOUTH);
+    // drawGraph();
+    setVisible(true);
+  }
+
+  /**
+   * Lays out the control buttons of this frame.
+   */
+  private void layoutControl() {
+    /*
+     * JPanel ctlLeft = new JPanel(new GridLayout(2, 1)); JPanel ctlCenter = new
+     * JPanel(new GridLayout(2, 1)); JPanel ctlRight = new JPanel(new
+     * GridLayout(2, 1));
+     * 
+     * controlPanel.add(ctlLeft, BorderLayout.WEST); controlPanel.add(ctlCenter,
+     * BorderLayout.CENTER); controlPanel.add(ctlRight, BorderLayout.EAST);
+     * 
+     * ctlLeft.add(previousSource); ctlRight.add(nextSource);
+     */
+
+    controlPanel.add(previousSource, BorderLayout.WEST);
+    controlPanel.add(nextSource, BorderLayout.EAST);
+    controlPanel.add(informationButton, BorderLayout.CENTER);
+    return;
+  }
+
+  /**
+   * Initializes the control buttons of this frame.
+   */
+  private void initializeButtons() {
+    nextSource = new JButton(">");
+    previousSource = new JButton("<");
+    informationButton = new JButton("More Information");
+
+    nextSource.addActionListener(new ActionListener() {
+      public void actionPerformed(ActionEvent e) {
+        int index = mainList.getSelectedIndex();
+        mainList.setSelectedIndex(index + 1);
+        return;
+      }
+    });
+    previousSource.addActionListener(new ActionListener() {
+      public void actionPerformed(ActionEvent e) {
+        int index = mainList.getSelectedIndex();
+        if (index > 0) {
+          mainList.setSelectedIndex(index - 1);
+        }
+        return;
+      }
+    });
+    informationButton.addActionListener(new ActionListener() {
+      public void actionPerformed(ActionEvent e) {
+        JButton source = (JButton) e.getSource();
+        if (informationPanel.isVisible()) {
+          source.setText("More Information");
+          informationPanel.setVisible(false);
+        } else {
+          source.setText("Less Information");
+          informationPanel.setVisible(true);
+        }
+        return;
+      }
+    });
+    return;
+  }
+
+  /**
+   * Displays the derivation tree for the current candidate translation. The
+   * current candidate translation is whichever translation is currently
+   * highlighted in the Derivation Browser's chooser frame.
+   */
+  public void drawGraph(TranslationInfo ti) {
+    viewPanel.removeAll();
+    String src = ti.sourceSentence();
+    Tree tgt = ti.translations().get(dataSetIndex);
+    String ref = ti.reference();
+
+    sourceLabel.setText(src);
+    referenceLabel.setText(ref);
+    oneBestLabel.setText(tgt.yield());
+
+    DerivationTree tree = new DerivationTree(tgt, src);
+    if (dv == null) {
+      dv = new DerivationViewer(tree, viewPanel.getSize(), targetColor,
+          DerivationViewer.AnchorType.ANCHOR_LEFTMOST_LEAF);
+    } else {
+      dv.setGraph(tree);
+    }
+    viewPanel.add(dv, BorderLayout.CENTER);
+    dv.revalidate();
+    repaint();
+    getContentPane().repaint();
+    return;
+  }
+
+  /**
+   * Makes this frame unmodifiable, so that the tree it displays cannot be
+   * changed. In fact, all that happens is the title is update and the
+   * navigation buttons are disabled. This method is intended to prevent the
+   * user from modifying the frame, not to prevent other code from modifying it.
+   */
+  public void disableNavigationButtons() {
+    setTitle(getTitle() + " (fixed)");
+    nextSource.setEnabled(false);
+    previousSource.setEnabled(false);
+    return;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/TranslationInfo.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/TranslationInfo.java b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/TranslationInfo.java
new file mode 100644
index 0000000..e23a89d
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/TranslationInfo.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.ui.tree_visualizer.browser;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.joshua.ui.tree_visualizer.tree.Tree;
+
+class TranslationInfo {
+  private String sourceSentence;
+  private String reference;
+  private ArrayList<Tree> translations;
+
+  public TranslationInfo() {
+    translations = new ArrayList<Tree>();
+  }
+
+  public String sourceSentence() {
+    return sourceSentence;
+  }
+
+  public void setSourceSentence(String src) {
+    sourceSentence = src;
+    return;
+  }
+
+  public String reference() {
+    return reference;
+  }
+
+  public void setReference(String ref) {
+    reference = ref;
+    return;
+  }
+
+  public List<Tree> translations() {
+    return translations;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java
new file mode 100644
index 0000000..662544b
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java
@@ -0,0 +1,283 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.ui.tree_visualizer.tree;
+
+import java.util.Stack;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Comparator;
+
+/**
+ * A class to represent the target-side tree produced by decoding using Joshua
+ * with an SCFG.
+ * <p>
+ * When decoding with use_tree_nbest=true, instead of a flat text output like
+ * "i asked her a question", we get a Penn treebank format tree like
+ * "(ROOT (S (NP i) (VP (V asked) (NP her) (NP (DT a) (N question)))))".
+ * If we also set include_align_index=true, we include source-side alignments
+ * for each internal node of the tree.
+ * <p>
+ * So, if the source input sentence is "je lui ai pose un question", if we
+ * turn on both configuration options, we end up with a decorated tree like
+ * this:
+ * "(ROOT{0-6} (S{0-6} (NP{0-1} i) (VP{1-6} (V{2-4} asked) (NP{1-2} her)
+ * (NP{4-6} (DT{4-5} a) (N{5-6} question)))))".
+ * <p>
+ * This class contains all the information of that flat string representation:
+ * the tree structure, the output (English) words, and the alignments to a
+ * source sentence.
+ * <p>
+ * Using a Tree the source sentence it was aligned to, we can create
+ * a DerivationTree object suitable for display. 
+ *
+ * @author Jonny Weese jonny@cs.jhu.edu
+ */
+public class Tree {
+
+  /**
+   * An array holding the label of each node of the tree, in depth-first order.
+   * The label of a node means the NT label assigned to an internal node, or
+   * the terminal symbol (English word) at a leaf.
+   */
+  private final String [] labels;
+
+  /**
+   * The number of children of each node of the tree, in depth-first order.
+   */
+  private final int [] numChildren;
+
+  /**
+   * The smallest source-side index that each node covers, in depth-first order.
+   * Note that we only have this information for internal nodes. For leaves,
+   * this value will always be -1.
+   */
+  private final int [] sourceStartIndices;
+
+  /**
+   * 1 + the largest source-side index that each node covers, in depth-first
+   * order. Note that we only have this informaion for internal nodes. For
+   * leaves, this value will always be -1.
+   */
+  private final int [] sourceEndIndices;
+
+  /**
+   * A pattern to match an aligned internal node and pull out its information.
+   * This pattern matches:
+   *
+   * 1) start-of-string
+   * 2) (
+   * 3) an arbitrary sequence of non-whitespace characters (at least 1)
+   * 4) {
+   * 5) a decimal number
+   * 6) -
+   * 7) a decimal number
+   * 8) }
+   * 9) end-of-string
+   *
+   * That is, it matches something like "(FOO{32-55}". The string and two 
+   * decimal numbers (parts 3, 5, and 7) are captured in groups.
+   */
+  private static final Pattern NONTERMINAL_PATTERN =
+      Pattern.compile("^\\((\\S+)\\{(\\d+)-(\\d+)\\}$");
+
+  /**
+   * Creates a Tree object from an input string in Penn treebank format with
+   * source alignment annotations.
+   * @param s an input string in Penn treebank format with source alignment annotations
+   */
+  public Tree(String s) {
+    final String [] tokens = s.replaceAll("\\)", " )").split("\\s+");
+    int numNodes = 0;
+    for (String t : tokens) {
+      if (!t.equals(")")) {
+        numNodes++;
+      }
+    }
+    labels = new String[numNodes];
+    numChildren = new int[numNodes];
+    sourceStartIndices = new int[numNodes];
+    sourceEndIndices = new int[numNodes];
+    try {
+      initialize(tokens);
+    } catch (Exception e) {
+      // This will catch most formatting errors.
+      throw new IllegalArgumentException(
+          String.format("couldn't create tree from string: \"%s\"", s),
+          e);
+    }
+  }
+
+  private void initialize(String [] tokens) {
+    final Stack<Integer> stack = new Stack<Integer>();
+    int nodeIndex = 0;
+    for (String token : tokens) {
+      final Matcher matcher = NONTERMINAL_PATTERN.matcher(token);
+      if (matcher.matches()) {
+        // new non-terminal node
+        labels[nodeIndex] = matcher.group(1);
+        sourceStartIndices[nodeIndex] = Integer.parseInt(matcher.group(2));
+        sourceEndIndices[nodeIndex] = Integer.parseInt(matcher.group(3));
+        stack.push(nodeIndex);
+        nodeIndex++;
+      } else if (token.equals(")")) {
+        // finished a subtree
+        stack.pop();
+        if (stack.empty()) {
+          break;
+        } else {
+          numChildren[stack.peek()]++;
+        }
+      } else {
+        // otherwise, it's a new leaf node
+        labels[nodeIndex] = token;
+        sourceStartIndices[nodeIndex] = -1;
+        sourceEndIndices[nodeIndex] = -1;
+        numChildren[stack.peek()]++;
+        nodeIndex++;
+      }
+    }
+    if (!stack.empty()) {
+      // Not enough close-parentheses at the end of the tree.
+      throw new IllegalArgumentException();
+    }
+  }
+
+  /**
+   * Return the number of nodes in this Tree.
+   * @return the number of nodes in this Tree
+   */
+  public int size() {
+    return labels.length;
+  }
+
+  /**
+   * Get the root Node of this Tree.
+   * @return the Node present at the toom the this Tree
+   */
+  public Node root() {
+    return new Node(0);
+  }
+
+  private List<Integer> childIndices(int index) {
+    List<Integer> result = new ArrayList<Integer>();
+    int remainingChildren = numChildren[index];
+    int childIndex = index + 1;
+    while (remainingChildren > 0) {
+      result.add(childIndex);
+      childIndex = nextSiblingIndex(childIndex);
+      remainingChildren--;
+    }
+    return result;
+  }
+
+  private int nextSiblingIndex(int index) {
+    int result = index + 1;
+    int remainingChildren = numChildren[index];
+    for (int i = 0; i < remainingChildren; i++) {
+      result = nextSiblingIndex(result);
+    }
+    return result;
+  }
+
+  public String yield() {
+    String result = "";
+    for (int i = 0; i < labels.length; i++) {
+      if (numChildren[i] == 0) {
+        if (!result.equals("")) {
+          result += " ";
+        }
+        result += labels[i];
+      }
+    }
+    return result;
+  }
+
+  @Override
+  public String toString() {
+    return root().toString();
+  }
+
+  /**
+   * A class representing the Nodes of a tree.
+   */
+  public class Node {
+
+    /**
+     * The index into the Tree class's internal arrays.
+     */
+    private final int index;
+
+    private Node(int i) {
+      index = i;
+    }
+
+    /**
+     * Get the label for this node. If the node is internal to the tree, its
+     * label is the non-terminal label assigned to it. If it is a leaf node,
+     * the label is the English word at the leaf.
+     * @return a string representing the label for this node
+     */
+    public String label() {
+      return labels[index];
+    }
+
+    public boolean isLeaf() {
+      return numChildren[index] == 0;
+    }
+
+    public int sourceStartIndex() {
+      return sourceStartIndices[index];
+    }
+
+    public int sourceEndIndex() {
+      return sourceEndIndices[index];
+    }
+
+    public List<Node> children() {
+      List<Node> result = new ArrayList<Node>();
+      for (int j : childIndices(index)) {
+        result.add(new Node(j));
+      }
+      return result;
+    }
+
+    @Override
+    public String toString() {
+      if (isLeaf()) {
+        return label();
+      }
+      String result = String.format("(%s{%d-%d}",
+          label(),
+          sourceStartIndex(),
+          sourceEndIndex());
+      for (Node c : children()) {
+        result += String.format(" %s", c);
+      }
+      return result + ")";
+    }
+  }
+
+  public static class NodeSourceStartComparator implements Comparator<Node> {
+    public int compare(Node a, Node b) {
+      return a.sourceStartIndex() - b.sourceStartIndex();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/Algorithms.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/Algorithms.java b/joshua-core/src/main/java/org/apache/joshua/util/Algorithms.java
new file mode 100644
index 0000000..327c882
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/Algorithms.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+public final class Algorithms {
+
+  /**
+   * Calculates the Levenshtein Distance for a candidate paraphrase given the source.
+   * 
+   * The code is based on the example by Michael Gilleland found at
+   * http://www.merriampark.com/ld.htm.
+   * @param candidate todo
+   * @param source todo
+   * @return the minimum edit distance.
+   */
+  public static final int levenshtein(String[] candidate, String[] source) {
+    // First check to see whether either of the arrays
+    // is empty, in which case the least cost is simply
+    // the length of the other array (which would correspond
+    // to inserting that many elements.
+    if (source.length == 0) return candidate.length;
+    if (candidate.length == 0) return source.length;
+
+    // Initialize a table to the minimum edit distances between
+    // any two points in the arrays. The size of the table is set
+    // to be one beyond the lengths of the two arrays, and the first
+    // row and first column are set to be zero to avoid complicated
+    // checks for out of bounds exceptions.
+    int distances[][] = new int[source.length + 1][candidate.length + 1];
+
+    for (int i = 0; i <= source.length; i++)
+      distances[i][0] = i;
+    for (int j = 0; j <= candidate.length; j++)
+      distances[0][j] = j;
+
+    // Walk through each item in the source and target arrays
+    // and find the minimum cost to move from the previous points
+    // to here.
+    for (int i = 1; i <= source.length; i++) {
+      Object sourceItem = source[i - 1];
+      for (int j = 1; j <= candidate.length; j++) {
+        Object targetItem = candidate[j - 1];
+        int cost;
+        if (sourceItem.equals(targetItem))
+          cost = 0;
+        else
+          cost = 1;
+        int deletionCost = distances[i - 1][j] + 1;
+        int insertionCost = distances[i][j - 1] + 1;
+        int substitutionCost = distances[i - 1][j - 1] + cost;
+        distances[i][j] = minimum(insertionCost, deletionCost, substitutionCost);
+      }
+    }
+    // The point at the end will be the minimum edit distance.
+    return distances[source.length][candidate.length];
+  }
+
+  /**
+   * Returns the minimum of the three values.
+   */
+  private static final int minimum(int a, int b, int c) {
+    int minimum;
+    minimum = a;
+    if (b < minimum) minimum = b;
+    if (c < minimum) minimum = c;
+    return minimum;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/Bits.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/Bits.java b/joshua-core/src/main/java/org/apache/joshua/util/Bits.java
new file mode 100644
index 0000000..b5294f6
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/Bits.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+/**
+ * Utility class for bit twiddling.
+ * 
+ * @author Lane Schwartz
+ */
+public class Bits {
+
+  /**
+   * Encodes two shorts in an int.
+   * 
+   * @param high input high short to encode
+   * @param low input low short to encode
+   * @return encoded int
+   */
+  public static int encodeAsInt(short high, short low) {
+
+    // Store the first short value in the highest 16 bits of the int
+    int key = high | 0x00000000;
+    key <<= 16;
+
+    // Store the second short value in the lowest 16 bits of the int
+    int lowInt = low & 0x0000FFFF;
+    key |= lowInt;
+
+    return key;
+
+  }
+
+  /**
+   * Decodes the high 16 bits of an integer as a short.
+   * 
+   * @param i Integer value to decode
+   * @return Short representation of the high 16 bits of the integer
+   */
+  public static short decodeHighBits(int i) {
+
+    long key = i & 0xFFFF0000l;
+
+    key >>= 16;
+
+    return (short) key;
+
+  }
+
+
+  /**
+   * Decodes the low 16 bits of an integer as a short.
+   * 
+   * @param i Integer value to decode
+   * @return Short representation of the high 16 bits of the integer
+   */
+  public static short decodeLowBits(int i) {
+
+    return (short) i;
+
+  }
+
+
+  /**
+   * Encodes two integers in a long.
+   * 
+   * @param high input high int to encode
+   * @param low input low int to encode
+   * @return encoded long
+   */
+  public static long encodeAsLong(int high, int low) {
+
+    // Store the first int value in the highest 32 bits of the long
+    long key = high | 0x0000000000000000l;
+    key <<= 32;
+
+    // Store the second int value in the lowest 32 bits of the long
+    long lowLong = low & 0x00000000FFFFFFFFl;;
+    key |= lowLong;
+
+    return key;
+
+  }
+
+  /**
+   * Decodes the high 32 bits of a long as an integer.
+   * 
+   * @param l Long value to decode
+   * @return Integer representation of the high 32 bits of the long
+   */
+  public static int decodeHighBits(long l) {
+
+    long key = l & 0xFFFFFFFF00000000l;
+
+    key >>= 32;
+
+    return (int) key;
+
+  }
+
+
+  /**
+   * Decodes the low 32 bits of a long as an integer.
+   * 
+   * @param l Long value to decode
+   * @return Integer representation of the high 32 bits of the long
+   */
+  public static int decodeLowBits(long l) {
+
+    return (int) l;
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/BotMap.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/BotMap.java b/joshua-core/src/main/java/org/apache/joshua/util/BotMap.java
new file mode 100644
index 0000000..1cc82b5
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/BotMap.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Gets a special map that maps any key to the a particular value.
+ * 
+ * @author Lane Schwartz
+ * @see "Lopez (2008), footnote 9 on p73"
+ */
+public class BotMap<K, V> implements Map<K, V> {
+
+  /** Special value, which this map will return for every key. */
+  private final V value;
+
+  /**
+   * Constructs a special map that maps any key to the a particular value.
+   * 
+   * @param value Special value, which this map will return for every key.
+   */
+  public BotMap(V value) {
+    this.value = value;
+  }
+
+  public void clear() {
+    throw new UnsupportedOperationException();
+  }
+
+  public boolean containsKey(Object key) {
+    return true;
+  }
+
+  public boolean containsValue(Object value) {
+    return this.value == value;
+  }
+
+  public Set<Map.Entry<K, V>> entrySet() {
+    throw new UnsupportedOperationException();
+  }
+
+  public V get(Object key) {
+    return value;
+  }
+
+  public boolean isEmpty() {
+    return false;
+  }
+
+  public Set<K> keySet() {
+    throw new UnsupportedOperationException();
+  }
+
+  public V put(K key, V value) {
+    throw new UnsupportedOperationException();
+  }
+
+  public void putAll(Map<? extends K, ? extends V> t) {
+    throw new UnsupportedOperationException();
+  }
+
+  public V remove(Object key) {
+    throw new UnsupportedOperationException();
+  }
+
+  public int size() {
+    throw new UnsupportedOperationException();
+  }
+
+  public Collection<V> values() {
+    return Collections.singleton(value);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/Cache.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/Cache.java b/joshua-core/src/main/java/org/apache/joshua/util/Cache.java
new file mode 100644
index 0000000..0d72f8a
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/Cache.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+// Imports
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ * Cache is a class that implements a least recently used cache. It is a straightforward extension
+ * of java.util.LinkedHashMap with its removeEldestEntry method overridden, so that stale entries
+ * are deleted once we reach the specified capacity of the Cache.
+ * <p>
+ * This class is quite useful for storing the results of computations that we would do many times
+ * over in the FeatureFunctions.
+ * 
+ * @author Chris Callison-Burch
+ * @since 14 April 2005
+ * 
+ */
+public class Cache<K, V> extends LinkedHashMap<K, V> {
+
+  private static final long serialVersionUID = 6073387072740892061L;
+
+  /** Logger for this class. */
+  private static final Logger LOG = LoggerFactory.getLogger(Cache.class);
+  // ===============================================================
+  // Constants
+  // ===============================================================
+
+  /**
+   * A constant is used as the default the cache size if none is specified.
+   */
+  public static final int DEFAULT_CAPACITY = 100000000;
+
+  /** Default initial capacity of the cache. */
+  public static final int INITIAL_CAPACITY = 1000000;
+
+  /** Default load factor of the cache. */
+  public static final float LOAD_FACTOR = 0.75f;
+
+  /**
+   * By default, ordering mode of the cache is access order (true).
+   */
+  public static final boolean ACCESS_ORDER = true;
+
+
+  // ===============================================================
+  // Member variables
+  // ===============================================================
+
+  /** Maximum number of items that the cache can contain. */
+  int maxCapacity;
+
+  // ===============================================================
+  // Constructor(s)
+  // ===============================================================
+
+  /**
+   * Creates a Cache with a set capacity.
+   * 
+   * @param maxCapacity the maximum capacity of the cache.
+   */
+  public Cache(int maxCapacity) {
+    super((maxCapacity < INITIAL_CAPACITY) ? maxCapacity : INITIAL_CAPACITY, LOAD_FACTOR,
+        ACCESS_ORDER);
+    this.maxCapacity = maxCapacity;
+  }
+
+
+  /**
+   * Creates a Cache with the DEFAULT_CAPACITY.
+   */
+  public Cache() {
+    this(DEFAULT_CAPACITY);
+  }
+
+  // ===============================================================
+  // Public
+  // ===============================================================
+
+  // ===========================================================
+  // Accessor methods (set/get)
+  // ===========================================================
+
+  @Override
+  public V get(Object key) {
+    LOG.debug("Cache get   key: {}", key);
+    return super.get(key);
+  }
+
+
+  @Override
+  public V put(K key, V value) {
+    LOG.debug("Cache put   key: {}", key);
+    return super.put(key, value);
+  }
+
+  // ===========================================================
+  // Methods
+  // ===========================================================
+
+  @Override
+  public boolean containsKey(Object key) {
+    boolean contains = super.containsKey(key);
+    if (contains){
+      LOG.debug("Cache has key: {}", key);
+    } else {
+      LOG.debug("Cache lacks key: {}", key);
+    }
+    return contains;
+  }
+
+
+  // ===============================================================
+  // Protected
+  // ===============================================================
+
+  // ===============================================================
+  // Methods
+  // ===============================================================
+
+  /**
+   * This method is invoked by put and putAll after inserting a new entry into the map. Once we
+   * reach the capacity of the cache, we remove the oldest entry each time a new entry is added.
+   * This reduces memory consumption by deleting stale entries.
+   * 
+   * @param eldest the eldest entry
+   * @return true if the capacity is greater than the maximum capacity
+   */
+  protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
+    boolean removing = size() > maxCapacity;
+    if (removing ) {
+      LOG.debug("Cache loses key: {}",  eldest.getKey());
+    }
+    return removing;
+  }
+
+  // ===============================================================
+  // Private
+  // ===============================================================
+
+  // ===============================================================
+  // Methods
+  // ===============================================================
+
+
+  // ===============================================================
+  // Static
+  // ===============================================================
+
+
+  // ===============================================================
+  // Main
+  // ===============================================================
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/ChartSpan.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/ChartSpan.java b/joshua-core/src/main/java/org/apache/joshua/util/ChartSpan.java
new file mode 100644
index 0000000..b22d2aa
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/ChartSpan.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+/**
+ * CKY-based decoding makes extensive use of charts, which maintain information about spans (i, j)
+ * over the length-n input sentence, 0 &lt;= i &lt;= j &lt;= n. These charts are used for many things; for
+ * example, lattices use a chart to denote whether there is a path between nodes i and j, and what
+ * their costs is, and the decoder uses charts to record the partial application of rules (DotChart}) 
+ * and the existence of proved items ({@link org.apache.joshua.decoder.phrase.PhraseChart}).
+ * 
+ * The dummy way to implement a chart is to initialize a two-dimensional array; however, this wastes
+ * a lot of space, because the constraint (i &lt;= j) means that only half of this space can ever be
+ * used. This is especially a problem for lattices, where the sentence length (n) is the number of
+ * nodes in the lattice!
+ * 
+ * Fortunately, there is a smarter way, since there is a simple deterministic mapping between chart
+ * spans under a given maximum length. This class implements that in a generic way, introducing
+ * large savings in both space and time.
+ * 
+ * @author Matt Post post@cs.jhu.edu
+ */
+public class ChartSpan<Type> {
+  Object[] chart;
+  int max;
+
+  public ChartSpan(int w, Type defaultValue) {
+    //System.err.println(String.format("ChartSpan::ChartSpan(%d)", w));
+    this.max = w;
+
+    /* offset(max,max) is the last position in the array */
+    chart = new Object[offset(max,max) + 1];
+
+    /* Initialize all arcs to infinity, except self-loops, which have distance 0 */
+    for (int i = 0; i < chart.length; i++)
+      chart[i] = defaultValue;
+  }
+  
+  @SuppressWarnings("unchecked")
+  public Type get(int i, int j) {
+    return (Type) chart[offset(i, j)];
+  }
+
+  public void set(int i, int j, Type value) {
+    chart[offset(i, j)] = value;
+  }
+
+  /**
+   * This computes the offset into the one-dimensional array for a given span.
+   * 
+   * @param i source node in span
+   * @param j target node in span
+   * @return the offset
+   */
+  private int offset(int i, int j) {
+    if (i < 0 || j > max || i > j) {
+      throw new RuntimeException(String.format("Invalid span (%d,%d | %d)", i, j, max));
+    }
+    
+    return i * (max + 1) - i * (i + 1) / 2 + j;
+  }
+
+  /**
+   * Convenience function for setting the values along the diagonal.
+   * 
+   * @param value input Type for which to set values
+   */
+  public void setDiagonal(Type value) {
+    for (int i = 0; i <= max; i++)
+      set(i, i, value);
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/CommandLineParser.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/CommandLineParser.java b/joshua-core/src/main/java/org/apache/joshua/util/CommandLineParser.java
new file mode 100644
index 0000000..974b973
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/CommandLineParser.java
@@ -0,0 +1,738 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Java Command Line Parser
+ * <p>
+ * The current version supports string and integer options.
+ * <p>
+ * Support is not included for options which take a list of values.
+ * 
+ * @author Lane O.B. Schwartz
+ */
+@SuppressWarnings("rawtypes")
+public class CommandLineParser {
+
+  private Map<Character, Option<Integer>> intShortForms;
+  private Map<String, Option<Integer>> intLongForms;
+
+  private Map<Character, Option<String>> stringShortForms;
+  private Map<String, Option<String>> stringLongForms;
+
+  private Map<Character, Option<Boolean>> booleanShortForms;
+  private Map<String, Option<Boolean>> booleanLongForms;
+
+  private List<Option> allOptions;
+
+  private final Set<String> localizedTrueStrings = new HashSet<String>();
+  private final Set<String> localizedFalseStrings = new HashSet<String>();
+
+  public CommandLineParser() {
+    intShortForms = new HashMap<Character, Option<Integer>>();
+    intLongForms = new HashMap<String, Option<Integer>>();
+
+    stringShortForms = new HashMap<Character, Option<String>>();
+    stringLongForms = new HashMap<String, Option<String>>();
+
+    booleanShortForms = new HashMap<Character, Option<Boolean>>();
+    booleanLongForms = new HashMap<String, Option<Boolean>>();
+
+    allOptions = new LinkedList<Option>();
+
+    localizedTrueStrings.add("true");
+    localizedTrueStrings.add("yes");
+    localizedFalseStrings.add("false");
+    localizedFalseStrings.add("no");
+  }
+
+  public CommandLineParser(Set<String> localizedTrueStrings, Set<String> localizedFalseStrings) {
+    this();
+
+    this.localizedTrueStrings.clear();
+    this.localizedFalseStrings.clear();
+
+    this.localizedTrueStrings.addAll(localizedTrueStrings);
+    this.localizedFalseStrings.addAll(localizedFalseStrings);
+  }
+
+  public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+      Integer defaultValue, Set<Integer> legalValues, String comment) {
+    if (shortForm != Option.MISSING_SHORT_FORM && (intShortForms.containsKey(shortForm))
+        || (!longForm.equals(Option.MISSING_LONG_FORM) && intLongForms.containsKey(longForm)))
+      throw new DuplicateOptionException("Duplicate options are not allowed");
+
+    Option<Integer> o =
+        new Option<Integer>(shortForm, longForm, valueVariable, defaultValue, legalValues, comment);
+    intShortForms.put(shortForm, o);
+    intLongForms.put(longForm, o);
+    allOptions.add(o);
+    return o;
+  }
+
+  public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+      Set<Integer> legalValues, String comment) {
+    return addIntegerOption(shortForm, longForm, valueVariable, null, legalValues, comment);
+  }
+
+  public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+      String comment) {
+    return addIntegerOption(shortForm, longForm, valueVariable, null, new UniversalSet<Integer>(),
+        comment);
+  }
+
+  public Option<Integer> addIntegerOption(char shortForm, String longForm, String comment) {
+    return addIntegerOption(shortForm, longForm, null, null, new UniversalSet<Integer>(), comment);
+  }
+
+  public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+      Integer defaultValue, String comment) {
+    return addIntegerOption(shortForm, longForm, valueVariable, defaultValue,
+        new UniversalSet<Integer>(), comment);
+  }
+
+  public Option<Integer> addIntegerOption(String longForm, String valueVariable,
+      Integer defaultValue, String comment) {
+    return addIntegerOption(Option.MISSING_SHORT_FORM, longForm, valueVariable, defaultValue,
+        new UniversalSet<Integer>(), comment);
+  }
+
+  public Option<Integer> addIntegerOption(char shortForm, String longForm) {
+    return addIntegerOption(shortForm, longForm, null, null, new UniversalSet<Integer>(), "");
+  }
+
+  public Option<Integer> addIntegerOption(char shortForm) {
+    return addIntegerOption(shortForm, Option.MISSING_LONG_FORM);
+  }
+
+  public Option<Integer> addIntegerOption(String longForm) {
+    return addIntegerOption(Option.MISSING_SHORT_FORM, longForm);
+  }
+
+  public Option<Integer> addIntegerOption(String longForm, String comment) {
+    return addIntegerOption(Option.MISSING_SHORT_FORM, longForm, comment);
+  }
+
+
+  // String options
+
+
+  public Option<String> addStringOption(char shortForm, String longForm, String valueVariable,
+      String defaultValue, Set<String> legalValues, String comment) {
+    if (shortForm != Option.MISSING_SHORT_FORM && (intShortForms.containsKey(shortForm))
+        || (!longForm.equals(Option.MISSING_LONG_FORM) && intLongForms.containsKey(longForm)))
+      throw new DuplicateOptionException("Duplicate options are not allowed");
+
+    Option<String> o =
+        new Option<String>(shortForm, longForm, valueVariable, defaultValue, legalValues, comment);
+    stringShortForms.put(shortForm, o);
+    stringLongForms.put(longForm, o);
+    allOptions.add(o);
+    return o;
+  }
+
+  public Option<String> addStringOption(char shortForm, String longForm, String valueVariable,
+      Set<String> legalValues, String comment) {
+    return addStringOption(shortForm, longForm, valueVariable, null, legalValues, comment);
+  }
+
+  public Option<String> addStringOption(char shortForm, String longForm, String valueVariable,
+      String comment) {
+    return addStringOption(shortForm, longForm, valueVariable, null, new UniversalSet<String>(),
+        comment);
+  }
+
+  public Option<String> addStringOption(String longForm, String valueVariable, String comment) {
+    return addStringOption(Option.MISSING_SHORT_FORM, longForm, valueVariable, null,
+        new UniversalSet<String>(), comment);
+  }
+
+  public Option<String> addStringOption(char shortForm, String longForm, String comment) {
+    return addStringOption(shortForm, longForm, null, null, new UniversalSet<String>(), comment);
+  }
+
+  public Option<String> addStringOption(char shortForm, String longForm, String valueVariable,
+      String defaultValue, String comment) {
+    return addStringOption(shortForm, longForm, valueVariable, defaultValue,
+        new UniversalSet<String>(), comment);
+  }
+
+  public Option<String> addStringOption(String longForm, String valueVariable, String defaultValue,
+      String comment) {
+    return addStringOption(Option.MISSING_SHORT_FORM, longForm, valueVariable, defaultValue,
+        new UniversalSet<String>(), comment);
+  }
+
+  public Option<String> addStringOption(char shortForm, String longForm) {
+    return addStringOption(shortForm, longForm, null, null, new UniversalSet<String>(), "");
+  }
+
+  public Option<String> addStringOption(char shortForm) {
+    return addStringOption(shortForm, Option.MISSING_LONG_FORM);
+  }
+
+  public Option<String> addStringOption(String longForm) {
+    return addStringOption(Option.MISSING_SHORT_FORM, longForm);
+  }
+
+  public Option<String> addStringOption(String longForm, String comment) {
+    return addStringOption(Option.MISSING_SHORT_FORM, longForm, comment);
+  }
+
+
+  // boolean options
+
+  public Option<Boolean> addBooleanOption(char shortForm, String longForm, String valueVariable,
+      Boolean defaultValue, String comment) {
+    if (shortForm != Option.MISSING_SHORT_FORM && (booleanShortForms.containsKey(shortForm))
+        || (!longForm.equals(Option.MISSING_LONG_FORM) && booleanLongForms.containsKey(longForm)))
+      throw new DuplicateOptionException("Duplicate options are not allowed");
+    Set<Boolean> legalBooleanValues = new HashSet<Boolean>();
+    legalBooleanValues.add(true);
+    legalBooleanValues.add(false);
+
+    Option<Boolean> o =
+        new Option<Boolean>(shortForm, longForm, valueVariable, defaultValue, legalBooleanValues,
+            comment);
+    booleanShortForms.put(shortForm, o);
+    booleanLongForms.put(longForm, o);
+    allOptions.add(o);
+    return o;
+  }
+
+  public Option<Boolean> addBooleanOption(char shortForm, String longForm, String valueVariable,
+      String comment) {
+    return addBooleanOption(shortForm, longForm, valueVariable, null, comment);
+  }
+
+  public Option<Boolean> addBooleanOption(char shortForm, String longForm, String comment) {
+    return addBooleanOption(shortForm, longForm, null, null, comment);
+  }
+
+  public Option<Boolean> addBooleanOption(String longForm, Boolean defaultValue, String comment) {
+    return addBooleanOption(Option.MISSING_SHORT_FORM, longForm, null, defaultValue, comment);
+  }
+
+  public Option<Boolean> addBooleanOption(String longForm, String valueVariable,
+      Boolean defaultValue, String comment) {
+    return addBooleanOption(Option.MISSING_SHORT_FORM, longForm, valueVariable, defaultValue,
+        comment);
+  }
+
+  public Option<Boolean> addBooleanOption(char shortForm, String longForm) {
+    return addBooleanOption(shortForm, longForm, null, null, "");
+  }
+
+  public Option<Boolean> addBooleanOption(char shortForm) {
+    return addBooleanOption(shortForm, Option.MISSING_LONG_FORM);
+  }
+
+  public Option<Boolean> addBooleanOption(String longForm) {
+    return addBooleanOption(Option.MISSING_SHORT_FORM, longForm);
+  }
+
+  public Option<Boolean> addBooleanOption(String longForm, String comment) {
+    return addBooleanOption(Option.MISSING_SHORT_FORM, longForm, comment);
+  }
+
+
+
+  // float options
+
+
+
+  // /
+  /*
+   * public Option<Integer> addIntegerOption(char shortForm, String longForm) { if
+   * (intShortForms.containsKey(shortForm) || intLongForms.containsKey(longForm)) throw new
+   * DuplicateOptionException("Duplicate options are not allowed");
+   * 
+   * Option<Integer> o = new Option<Integer>(shortForm, longForm); intShortForms.put(shortForm, o);
+   * intLongForms.put(longForm, o); allOptions.add(o);
+   * 
+   * return o; }
+   * 
+   * public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+   * int defaultValue, Set<Integer> legalValues, String comment) { if
+   * (intShortForms.containsKey(shortForm) || intLongForms.containsKey(longForm)) throw new
+   * DuplicateOptionException("Duplicate options are not allowed");
+   * 
+   * Option<Integer> o = new Option<Integer>(shortForm, longForm, valueVariable, defaultValue,
+   * comment); intShortForms.put(shortForm, o); intLongForms.put(longForm, o); allOptions.add(o);
+   * return o; }
+   * 
+   * public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+   * int defaultValue, String comment) { if (intShortForms.containsKey(shortForm) ||
+   * intLongForms.containsKey(longForm)) throw new
+   * DuplicateOptionException("Duplicate options are not allowed");
+   * 
+   * Option<Integer> o = new Option<Integer>(shortForm, longForm, valueVariable, defaultValue,
+   * comment); intShortForms.put(shortForm, o); intLongForms.put(longForm, o); allOptions.add(o);
+   * return o; }
+   * 
+   * public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+   * String comment) { if (intShortForms.containsKey(shortForm) ||
+   * intLongForms.containsKey(longForm)) throw new
+   * DuplicateOptionException("Duplicate options are not allowed");
+   * 
+   * Option<Integer> o = new Option<Integer>(shortForm, longForm, valueVariable, comment);
+   * intShortForms.put(shortForm, o); intLongForms.put(longForm, o); allOptions.add(o); return o; }
+   */
+
+  /*
+   * public Option<String> addStringOption(char shortForm, String longForm) { if
+   * (stringShortForms.containsKey(shortForm) || stringLongForms.containsKey(longForm)) throw new
+   * DuplicateOptionException("Duplicate options are not allowed");
+   * 
+   * Option<String> o = new Option<String>(shortForm, longForm); stringShortForms.put(shortForm, o);
+   * stringLongForms.put(longForm, o); allOptions.add(o); return o; }
+   */
+
+  public void parse(String[] argv) {
+
+    Collection<Option> parsedOptions = new HashSet<Option>();
+
+    int index = 0;
+
+    while (index < argv.length) {
+      if (argv[index].startsWith("--")) {
+        int splitPoint = argv[index].indexOf('=');
+        if (splitPoint == 2) {
+          throw new CommandLineParserException("Invalid option: --");
+        } else if (splitPoint >= 0) {
+          String option = argv[index].substring(2, splitPoint);
+          String value = argv[index].substring(splitPoint + 1);
+          parsedOptions.add(parseLongForm(option, value));
+        } else if (index + 1 < argv.length) {
+          String option = argv[index].substring(2);
+          String value = argv[index + 1];
+          if (value.startsWith("-") && !value.equals("-") && !value.equals("--")) {
+            parsedOptions.add(parseLongForm(option));
+          } else {
+            parsedOptions.add(parseLongForm(option, value));
+            index++;
+          }
+        } else {
+          // Must be a boolean option
+          String option = argv[index].substring(2);
+          parsedOptions.add(parseLongForm(option));
+          // throw new CommandLineParserException("No value provided for option " +
+          // argv[index].substring(2));
+        }
+      } else if (argv[index].startsWith("-")) {
+        String option = argv[index].substring(1);
+        if (option.length() == 1) {
+          if (index + 1 < argv.length) {
+            String value = argv[index + 1];
+            if (value.startsWith("-") && !value.equals("-") && !value.equals("--")) {
+              // Must be a boolean option
+              parsedOptions.add(parseShortForm(option.charAt(0)));
+            } else {
+              parsedOptions.add(parseShortForm(option.charAt(0), value));
+              index++;
+            }
+          } else {
+            // Must be a boolean option
+            parsedOptions.add(parseShortForm(option.charAt(0)));
+          }
+        } else {
+          throw new CommandLineParserException(argv[index] + " is not a valid option");
+        }
+      }
+      index++;
+    }
+
+    for (Option o : allOptions) {
+      if (o.isRequired() && !parsedOptions.contains(o)) {
+        die("A required option was not provided:\n " + o + "\n");
+      }
+    }
+
+  }
+
+  public void printUsage() {
+    System.err.println("Usage:");
+    for (Option o : allOptions) {
+      System.err.println(o);
+    }
+  }
+
+  private void die(String error) {
+    System.err.println(error);
+    printUsage();
+    System.exit(1);
+  }
+
+  public Option parseLongForm(String key, String value) {
+
+    if (intLongForms.containsKey(key)) {
+      try {
+        Option<Integer> o = intLongForms.get(key);
+        o.setValue(Integer.valueOf(value));
+        return o;
+      } catch (NumberFormatException e) {
+        die("Option " + key + " requires an integer value.");
+        return null;
+      }
+    } else if (stringLongForms.containsKey(key)) {
+      Option<String> o = stringLongForms.get(key);
+      o.setValue(value);
+      return o;
+    } else if (booleanLongForms.containsKey(key)) {
+      Option<Boolean> o = booleanLongForms.get(key);
+
+      if (localizedTrueStrings.contains(value.toLowerCase())) {
+        o.setValue(true);
+      } else if (localizedFalseStrings.contains(value.toLowerCase())) {
+        o.setValue(false);
+      } else {
+        throw new CommandLineParserException("Invalid value \"" + value + "\" for boolean option "
+            + key);
+      }
+
+      return o;
+    } else {
+
+      throw new Error("Bug in command line parser - unexpected option type encountered for option "
+          + key);
+    }
+  }
+
+  public Option parseLongForm(String key) {
+
+    if (booleanLongForms.containsKey(key)) {
+      Option<Boolean> o = booleanLongForms.get(key);
+      o.setValue(true);
+      return o;
+
+    } else {
+      throw new CommandLineParserException("No such boolean option exists: --" + key);
+    }
+  }
+
+  public Option parseShortForm(Character key) {
+
+    if (booleanShortForms.containsKey(key)) {
+      Option<Boolean> o = booleanShortForms.get(key);
+      o.setValue(true);
+      return o;
+
+    } else {
+      throw new CommandLineParserException("No such boolean option exists: -" + key);
+    }
+  }
+
+  public Option parseShortForm(Character key, String value) {
+    if (intShortForms.containsKey(key)) {
+      try {
+        Option<Integer> o = intShortForms.get(key);
+        o.setValue(Integer.valueOf(value));
+        return o;
+      } catch (NumberFormatException e) {
+        die("Option " + key + " requires an integer value.");
+        return null;
+      }
+    } else if (stringShortForms.containsKey(key)) {
+      Option<String> o = stringShortForms.get(key);
+      o.setValue(value);
+      return o;
+    } else if (booleanShortForms.containsKey(key)) {
+      Option<Boolean> o = booleanShortForms.get(key);
+
+      if (localizedTrueStrings.contains(value.toLowerCase())) {
+        o.setValue(true);
+      } else if (localizedFalseStrings.contains(value.toLowerCase())) {
+        o.setValue(false);
+      } else {
+        throw new CommandLineParserException("Invalid value \"" + value + "\" for boolean option "
+            + key);
+      }
+
+      return o;
+    } else {
+      throw new Error("Bug in command line parser - unexpected option type encountered");
+    }
+  }
+
+  /*
+   * public int intValue(Option o) { if (intOptions.containsKey(o)) return intOptions.get(o); else
+   * throw new RuntimeException("No such integer option"); }
+   * 
+   * public String stringValue(Option o) { if (stringOptions.containsKey(o)) return
+   * stringOptions.get(o); else throw new RuntimeException("No such string option"); }
+   */
+
+  public <OptionType> OptionType getValue(Option<OptionType> option) {
+    return option.getValue();
+  }
+
+  public boolean hasValue(Option<?> option) {
+    return option.hasValue();
+  }
+
+  public static void main(String[] args) {
+    CommandLineParser parser = new CommandLineParser();
+    Option<Integer> n = parser.addIntegerOption('n', "number", "NUMBER", "a number to be supplied");
+
+    parser.parse(args);
+
+    // parser.printUsage();
+    System.out.println(parser.getValue(n));
+  }
+
+  @SuppressWarnings("serial")
+  public static class CommandLineParserException extends RuntimeException {
+    public CommandLineParserException(String message) {
+      super(message);
+    }
+  }
+
+  @SuppressWarnings("serial")
+  public static class DuplicateOptionException extends RuntimeException {
+    public DuplicateOptionException(String message) {
+      super(message);
+    }
+  }
+
+  public class Option<OptionType> {
+    private final char shortForm;
+    private final String longForm;
+    private final String comment;
+    private final OptionType defaultValue;
+    private final String valueVariable;
+    private final Set<OptionType> legalValues;
+
+    public static final char MISSING_SHORT_FORM = '\u0000';
+    public static final String MISSING_LONG_FORM = "\u0000";
+
+    private OptionType optionValue;
+
+    public Option(char shortForm, String longForm, String valueVariable, OptionType defaultValue,
+        Set<OptionType> legalValues, String comment) {
+
+      if (longForm == null) throw new NullPointerException("longForm must not be null");
+
+      if (comment == null) throw new NullPointerException("comment must not be null");
+
+      this.shortForm = shortForm;
+      this.longForm = longForm;
+      this.comment = comment;
+      this.valueVariable = valueVariable;
+      this.defaultValue = defaultValue;
+      this.legalValues = legalValues;
+      this.optionValue = null;
+    }
+
+    public Option(char shortForm, String longForm, String valueVariable,
+        Set<OptionType> legalValues, String comment) {
+      this(shortForm, longForm, valueVariable, null, legalValues, comment);
+    }
+
+
+    public Option(char shortForm, String longForm, String valueVariable, String comment) {
+      this(shortForm, longForm, valueVariable, null, new UniversalSet<OptionType>(), comment);
+    }
+
+    public Option(char shortForm, String longForm, String comment) {
+      this(shortForm, longForm, null, null, new UniversalSet<OptionType>(), comment);
+    }
+
+    public Option(char shortForm, String longForm, String valueVariable, OptionType defaultValue,
+        String comment) {
+      this(shortForm, longForm, valueVariable, defaultValue, new UniversalSet<OptionType>(),
+          comment);
+    }
+
+    public Option(String longForm, String valueVariable, OptionType defaultValue, String comment) {
+      this(MISSING_SHORT_FORM, longForm, valueVariable, defaultValue,
+          new UniversalSet<OptionType>(), comment);
+    }
+
+    public Option(char shortForm, String longForm) {
+      this(shortForm, longForm, null, null, new UniversalSet<OptionType>(), "");
+    }
+
+    public Option(char shortForm) {
+      this(shortForm, MISSING_LONG_FORM);
+    }
+
+    public Option(String longForm) {
+      this(MISSING_SHORT_FORM, longForm);
+    }
+
+    public Option(String longForm, String comment) {
+      this(MISSING_SHORT_FORM, longForm, comment);
+    }
+
+    public boolean isOptional() {
+      return (null != defaultValue);
+    }
+
+    public boolean isRequired() {
+      return (null == defaultValue);
+    }
+
+    public char getShortForm() {
+      return shortForm;
+    }
+
+    public String getLongForm() {
+      return longForm;
+    }
+
+    public String getComment() {
+      return comment;
+    }
+
+    void setValue(OptionType value) {
+      this.optionValue = value;
+    }
+
+    OptionType getValue() {
+      if (optionValue != null) {
+        return optionValue;
+      } else if (defaultValue != null) {
+        return defaultValue;
+      } else {
+        throw new CommandLineParserException(
+            "Unable to get value because option has not been initialized and does not have a default value: "
+                + this.toString());
+      }
+    }
+
+    boolean hasValue() {
+      return !(null == optionValue && null == defaultValue);
+    }
+
+    public String toString() {
+
+      String formattedShortForm;
+      if (shortForm == Option.MISSING_SHORT_FORM) {
+        formattedShortForm = "";
+      } else {
+        formattedShortForm = "-" + shortForm;
+      }
+
+      String formattedLongForm;
+      if (longForm.equals(Option.MISSING_LONG_FORM)) {
+        formattedLongForm = "";
+      } else {
+        formattedLongForm = "--" + longForm;
+      }
+
+      if (shortForm != Option.MISSING_SHORT_FORM && !longForm.equals(Option.MISSING_LONG_FORM)) {
+        formattedShortForm += ",";
+      }
+
+      if (valueVariable != null && valueVariable.length() >= 1) {
+        formattedLongForm += "=" + valueVariable;
+      }
+
+      String string = String.format(" %1$3s %2$-21s", formattedShortForm, formattedLongForm);
+
+      if (null != comment) {
+        string += " " + comment;
+      }
+
+      if (!(legalValues instanceof UniversalSet)) {
+        string += " " + legalValues;
+      }
+
+      return string;
+    }
+
+    public boolean equals(Object o) {
+      if (o instanceof Option) {
+        return (shortForm == ((Option) o).shortForm && longForm == ((Option) o).longForm);
+      } else {
+        return false;
+      }
+    }
+
+    public int hashCode() {
+      return (shortForm + longForm).hashCode();
+    }
+  }
+
+  static class UniversalSet<E> implements Set<E> {
+
+    public boolean add(Object o) {
+      throw new UnsupportedOperationException();
+    }
+
+    public boolean addAll(Collection c) {
+      throw new UnsupportedOperationException();
+    }
+
+    public void clear() {
+      throw new UnsupportedOperationException();
+    }
+
+    public boolean contains(Object o) {
+      return true;
+    }
+
+    public boolean containsAll(Collection c) {
+      return true;
+    }
+
+    public boolean isEmpty() {
+      return false;
+    }
+
+    public Iterator<E> iterator() {
+      return null;
+    }
+
+    public boolean remove(Object o) {
+      throw new UnsupportedOperationException();
+    }
+
+    public boolean removeAll(Collection c) {
+      throw new UnsupportedOperationException();
+    }
+
+    public boolean retainAll(Collection c) {
+      throw new UnsupportedOperationException();
+    }
+
+    public int size() {
+      return Integer.MAX_VALUE;
+    }
+
+    public Object[] toArray() {
+      return null;
+    }
+
+    public <T> T[] toArray(T[] a) {
+      return null;
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/Constants.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/Constants.java b/joshua-core/src/main/java/org/apache/joshua/util/Constants.java
new file mode 100644
index 0000000..3d4139d
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/Constants.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+/***
+ * One day, all constants should be moved here (many are in Vocabulary).
+ * 
+ * @author Matt Post post@cs.jhu.edu
+ */
+
+public final class Constants {
+  public static String defaultNT = "[X]";
+
+  public static final String START_SYM = "<s>";
+  public static final String STOP_SYM = "</s>";
+  public static final String UNKNOWN_WORD = "<unk>";
+  
+  public static final String fieldDelimiter = "\\s\\|{3}\\s";
+  public static final String spaceSeparator = "\\s+";
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/Counted.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/Counted.java b/joshua-core/src/main/java/org/apache/joshua/util/Counted.java
new file mode 100644
index 0000000..9f719b3
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/Counted.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+import java.util.Comparator;
+
+/**
+ * Represents an object being counted, with the associated count.
+ * 
+ * @author Lane Schwartz
+ */
+public class Counted<E> implements Comparable<Counted<E>> {
+
+  /** The element being counted. */
+  private final E element;
+
+  /** The count associated with the element. */
+  private final Integer count;
+
+  /**
+   * Constructs an object wrapping an element and its associated count.
+   * 
+   * @param element An element being counted
+   * @param count The count associated with the element
+   */
+  public Counted(E element, int count) {
+    this.element = element;
+    this.count = count;
+  }
+
+  /**
+   * Gets the count associated with this object's element.
+   * 
+   * @return The count associated with this object's element
+   */
+  public int getCount() {
+    return count;
+  }
+
+  /**
+   * Gets the element associated with this object.
+   * 
+   * @return The element associated with this object
+   */
+  public E getElement() {
+    return element;
+  }
+
+  /**
+   * Compares this object to another counted object, according to the natural order of the counts
+   * associated with each object.
+   * 
+   * @param o Another counted object
+   * @return -1 if the count of this object is less than the count of the other object, 0 if the
+   *         counts are equal, or 1 if the count of this object is greater than the count of the
+   *         other object
+   */
+  public int compareTo(Counted<E> o) {
+    return count.compareTo(o.count);
+  }
+
+  /**
+   * Gets a comparator that compares two counted objects based on the reverse of the natural order
+   * of the counts associated with each object.
+   * @param <E> todo
+   * @return A comparator that compares two counted objects based on the reverse of the natural
+   *         order of the counts associated with each object
+   */
+  public static <E> Comparator<Counted<E>> getDescendingComparator() {
+    return new Comparator<Counted<E>>() {
+      public int compare(Counted<E> o1, Counted<E> o2) {
+        return (o2.count.compareTo(o1.count));
+      }
+    };
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/Counts.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/Counts.java b/joshua-core/src/main/java/org/apache/joshua/util/Counts.java
new file mode 100644
index 0000000..89a9f38
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/Counts.java
@@ -0,0 +1,306 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+
+/**
+ * Maintains element co-occurrence data.
+ * 
+ * @author Lane Schwartz
+ * @author Chris Callison-Burch
+ */
+public class Counts<A, B> implements Iterable<Pair<A, B>> {
+
+  /**
+   * Stores the number of times instances of A and B co-occur.
+   */
+  private Map<A, Map<B, Integer>> counts;
+
+  /** Stores the number of times instances of B occur. */
+  private Map<B, Integer> bTotals;
+
+  /** Stores relative frequency estimates for p(A | B). */
+  private Map<A, Map<B, Float>> probabilities;
+
+  /** Stores relative frequency estimates for p(B | A). */
+  private Map<B, Map<A, Float>> reverseProbabilities;
+
+  /** Stores the value to return when an unseen pair is queried. */
+  private float floorProbability;
+
+  /**
+   * Constructs an initially empty co-occurrence counter, with floor probability set to
+   * <code>Float.MIN_VALUE</code>.
+   */
+  public Counts() {
+    this(Float.MIN_VALUE);
+  }
+
+  /**
+   * Constructs an initially empty co-occurrence counter.
+   * 
+   * @param floorProbability Floor probability to use when an unseen pair is queried.
+   */
+  public Counts(float floorProbability) {
+    this.floorProbability = floorProbability;
+    this.counts = new HashMap<A, Map<B, Integer>>();
+    this.bTotals = new HashMap<B, Integer>();
+    this.probabilities = new HashMap<A, Map<B, Float>>();
+    this.reverseProbabilities = new HashMap<B, Map<A, Float>>();
+  }
+
+
+  /**
+   * Increments the co-occurrence count of the provided objects.
+   * 
+   * @param a input object A
+   * @param b input object B
+   */
+  public void incrementCount(A a, B b) {
+    // increment the count and handle the adding of objects to the map if they aren't already there
+    {
+      Map<B, Integer> bMap;
+      if (counts.containsKey(a)) {
+        bMap = counts.get(a);
+      } else {
+        bMap = new HashMap<B, Integer>();
+        counts.put(a, bMap);
+      }
+
+      Integer previousCount;
+      if (bMap.containsKey(b)) {
+        previousCount = bMap.get(b);
+      } else {
+        previousCount = 0;
+      }
+      bMap.put(b, previousCount + 1);
+    }
+
+    // increments total for o2.
+    {
+      Integer previousTotal;
+      if (bTotals.containsKey(b)) {
+        previousTotal = bTotals.get(b);
+      } else {
+        previousTotal = 0;
+      }
+      bTotals.put(b, previousTotal + 1);
+    }
+
+    // Invalidate previously calculated probabilities
+    {
+      if (probabilities.containsKey(a)) {
+        probabilities.get(a).clear();
+      }
+
+      if (reverseProbabilities.containsKey(b)) {
+        reverseProbabilities.get(b).clear();
+      }
+    }
+  }
+
+  /**
+   * Gets the co-occurrence count for the two elements.
+   * 
+   * @param a input object A
+   * @param b input object B
+   * @return the co-occurrence count for the two elements
+   */
+  public int getCount(A a, B b) {
+
+    int count = 0;
+    if (counts.containsKey(a)) {
+      Map<B, Integer> bMap = counts.get(a);
+      if (bMap.containsKey(b)) {
+        count = bMap.get(b);
+      }
+    }
+
+    return count;
+  }
+
+  /**
+   * Gets the total number of times the specified element has been seen.
+   * 
+   * @param b
+   * @return the total number of times the specified element has been seen
+   */
+  int getCount(B b) {
+
+    return (bTotals.containsKey(b) ? bTotals.get(b) : 0);
+
+  }
+
+  /**
+   * Gets the probability of a given b.
+   * <p>
+   * This value is the relative frequency estimate.
+   * 
+   * @param a object A
+   * @param b object B
+   * @return the probability of a given b.
+   */
+  public float getProbability(A a, B b) {
+
+    int count = getCount(a, b);
+    int bCount = getCount(b);
+
+    Float value;
+    if (count == 0 || bCount == 0) {
+
+      value = floorProbability;
+
+    } else {
+
+      Map<B, Float> bMap;
+      if (probabilities.containsKey(a)) {
+        bMap = probabilities.get(a);
+      } else {
+        bMap = new HashMap<B, Float>();
+      }
+
+
+      if (bMap.containsKey(b)) {
+        value = bMap.get(b);
+      } else {
+        value = (float) count / (float) getCount(b);
+        bMap.put(b, value);
+      }
+
+    }
+
+    return value;
+  }
+
+  /**
+   * Gets the probability of b given a.
+   * <p>
+   * This value is the relative frequency estimate in the reverse direction.
+   * 
+   * @param b object B
+   * @param a object A
+   * @return the probability of b given a.
+   */
+  public float getReverseProbability(B b, A a) {
+
+    int count = getCount(a, b);
+
+    Float value = floorProbability;
+
+    if (count > 0) {
+
+      int aCount = 0;
+      for (Integer aValue : counts.get(a).values()) {
+        aCount += aValue;
+      }
+
+      if (aCount > 0) {
+
+        Map<A, Float> aMap;
+        if (reverseProbabilities.containsKey(b)) {
+          aMap = reverseProbabilities.get(b);
+        } else {
+          aMap = new HashMap<A, Float>();
+        }
+
+        if (aMap.containsKey(a)) {
+          value = aMap.get(a);
+        } else {
+          value = (float) count / (float) aCount;
+        }
+
+      }
+
+    }
+
+    return value;
+
+  }
+
+  /**
+   * Gets the floor probability that is returned whenever an unseen pair is queried.
+   * 
+   * @return The floor probability that is returned whenever an unseen pair is queried
+   */
+  public float getFloorProbability() {
+    return this.floorProbability;
+  }
+
+  public void writeExternal(ObjectOutput out) throws IOException {
+    out.writeObject(counts);
+    out.writeObject(bTotals);
+    out.writeObject(probabilities);
+    out.writeObject(reverseProbabilities);
+    out.writeFloat(floorProbability);
+    // out.close();
+  }
+
+  @SuppressWarnings("unchecked")
+  public void readExternal(ObjectInput in) throws ClassNotFoundException, IOException {
+    this.counts = (HashMap<A, Map<B, Integer>>) in.readObject();
+    this.bTotals = (HashMap<B, Integer>) in.readObject();
+    this.probabilities = (HashMap<A, Map<B, Float>>) in.readObject();
+    this.reverseProbabilities = (HashMap<B, Map<A, Float>>) in.readObject();
+    this.floorProbability = in.readFloat();
+  }
+
+  /**
+   * Gets an iterator over all counted pairs.
+   * <p>
+   * The pairs are not guaranteed to be iterated over in any particular order.
+   * 
+   * @return an iterator over all counted pairs
+   */
+  public Iterator<Pair<A, B>> iterator() {
+
+    final Iterator<Entry<A, Map<B, Integer>>> aIterator = counts.entrySet().iterator();
+
+    return new Iterator<Pair<A, B>>() {
+
+      Entry<A, Map<B, Integer>> entry = null;
+      Iterator<B> bIterator = null;
+
+      public boolean hasNext() {
+        return (bIterator != null && bIterator.hasNext()) || aIterator.hasNext();
+      }
+
+      public Pair<A, B> next() {
+        if (bIterator == null || !bIterator.hasNext()) {
+          entry = aIterator.next();
+          bIterator = entry.getValue().keySet().iterator();
+        }
+
+        return new Pair<A, B>(entry.getKey(), bIterator.next());
+      }
+
+      public void remove() {
+        throw new UnsupportedOperationException();
+      }
+
+    };
+  }
+
+}