You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/23 18:45:35 UTC
[24/60] [partial] incubator-joshua git commit: maven multi-module
layout 1st commit: moving files into joshua-core
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java
new file mode 100644
index 0000000..ee22b94
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.ui.tree_visualizer.browser;
+
+import java.awt.BorderLayout;
+import java.awt.Color;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Scanner;
+
+import javax.swing.DefaultListModel;
+import javax.swing.JFrame;
+import javax.swing.JList;
+import javax.swing.JScrollPane;
+import javax.swing.JTextField;
+import javax.swing.ListSelectionModel;
+import javax.swing.event.DocumentEvent;
+import javax.swing.event.DocumentListener;
+import javax.swing.event.ListSelectionEvent;
+import javax.swing.event.ListSelectionListener;
+
+import org.apache.joshua.ui.tree_visualizer.tree.Tree;
+import org.apache.joshua.util.io.LineReader;
+
+public class Browser {
+
+ /**
+ * A list that contains the one best translation of each source sentence.
+ */
+ private static JList oneBestList;
+
+ private static JTextField searchBox;
+
+ /**
+ * The current frame that displays a derivation tree.
+ */
+ private static List<DerivationTreeFrame> activeFrame;
+
+ private static List<TranslationInfo> translations;
+ /**
+ * Default width of the chooser frame.
+ */
+ private static final int DEFAULT_WIDTH = 640;
+
+ /**
+ * Default height of the chooser frame.
+ */
+ private static final int DEFAULT_HEIGHT = 480;
+
+ /**
+ * List of colors to be used in derivation trees
+ */
+ static final Color[] dataSetColors = { Color.red, Color.orange, Color.blue, Color.green };
+
+ /**
+ * @param argv the paths to the source, reference, and n-best files
+ * @throws IOException if there is an error reading from standard input
+ */
+ public static void main(String[] argv) throws IOException {
+ String sourcePath = argv.length > 0 ? argv[0] : null;
+ String referencePath = argv.length > 1 ? argv[1] : null;
+ String[] translationPaths = new String[0];
+ if (argv.length > 2) {
+ translationPaths = Arrays.copyOfRange(argv, 2, argv.length);
+ }
+ translations = new ArrayList<TranslationInfo>();
+ readSourcesFromPath(sourcePath);
+ readReferencesFromPath(referencePath);
+ for (String tp : translationPaths) {
+ readTranslationsFromPath(tp);
+ }
+ initializeChooserFrame();
+ return;
+ }
+
+ private static void readSourcesFromPath(String path) throws IOException {
+ for (String line: new LineReader(path)) {
+ TranslationInfo ti = new TranslationInfo();
+ ti.setSourceSentence("<s> " + line + " </s>");
+ translations.add(ti);
+ }
+ }
+
+ private static void readReferencesFromPath(String path) throws IOException {
+ Scanner scanner = new Scanner(new File(path), "UTF-8");
+ for (TranslationInfo ti : translations) {
+ if (scanner.hasNextLine()) {
+ ti.setReference(scanner.nextLine());
+ }
+ }
+ scanner.close();
+ }
+
+ private static void readTranslationsFromPath(String path) throws IOException {
+ Scanner scanner = new Scanner(new File(path), "UTF-8");
+ String sentenceIndex = null;
+ for (TranslationInfo ti : translations) {
+ while (scanner.hasNextLine()) {
+ final String[] fields = scanner.nextLine().split("\\|\\|\\|");
+ final String index = fields[0];
+ final String tree = fields[1].trim();
+ if (!index.equals(sentenceIndex)) {
+ sentenceIndex = index;
+ ti.translations().add(new Tree(tree));
+ break;
+ }
+ }
+ }
+ scanner.close();
+ }
+
+ /**
+ * Initializes the various JComponents in the chooser frame.
+ */
+ private static void initializeChooserFrame() {
+ JFrame chooserFrame = new JFrame("Joshua Derivation Tree Browser");
+ chooserFrame.setLayout(new BorderLayout());
+
+ /*
+ * JMenuBar mb = new JMenuBar(); JMenu openMenu = new JMenu("Control"); JMenuItem src = new
+ * JMenuItem("Open source file ..."); JMenuItem ref = new JMenuItem("Open reference file ...");
+ * JMenuItem tgt = new JMenuItem("Open n-best derivations file ..."); JMenuItem quit = new
+ * JMenuItem("Quit");
+ *
+ * new FileChoiceListener(chooserFrame, src, ref, tgt);
+ *
+ * quit.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) {
+ * System.exit(0); } }); openMenu.add(src); openMenu.add(ref); openMenu.add(tgt);
+ * openMenu.add(quit); mb.add(openMenu); chooserFrame.setJMenuBar(mb);
+ */
+
+ searchBox = new JTextField("search");
+ searchBox.getDocument().addDocumentListener(new SearchListener());
+ searchBox.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ final int selectedIndex = oneBestList.getSelectedIndex();
+ Browser.search(selectedIndex < 0 ? 0 : selectedIndex + 1);
+ }
+ });
+ oneBestList = new JList(new DefaultListModel());
+ oneBestList.setFixedCellWidth(200);
+ oneBestList.setSelectionMode(ListSelectionModel.SINGLE_SELECTION);
+ // oneBestList.setCellRenderer(new DerivationBrowserListCellRenderer());
+
+ oneBestList.addListSelectionListener(new ListSelectionListener() {
+ public void valueChanged(ListSelectionEvent e) {
+ for (DerivationTreeFrame frame : activeFrame) {
+ frame.drawGraph(translations.get(oneBestList.getSelectedIndex()));
+ }
+ return;
+ }
+ });
+ chooserFrame.getContentPane().add(searchBox, BorderLayout.NORTH);
+ chooserFrame.getContentPane().add(new JScrollPane(oneBestList), BorderLayout.CENTER);
+
+ refreshLists();
+ chooserFrame.setSize(DEFAULT_WIDTH, DEFAULT_HEIGHT);
+ chooserFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+
+ activeFrame = new ArrayList<DerivationTreeFrame>();
+ int numNBestFiles = translations.get(0).translations().size();
+ for (int i = 0; i < numNBestFiles; i++)
+ activeFrame.add(new DerivationTreeFrame(i, oneBestList));
+ chooserFrame.setVisible(true);
+ return;
+ }
+
+ /**
+ * Removes and re-adds the appropriate values to the reference and one-best lists.
+ */
+ private static void refreshLists() {
+ oneBestList.removeAll();
+ DefaultListModel oneBestListModel = (DefaultListModel) oneBestList.getModel();
+ for (TranslationInfo ti : translations) {
+ oneBestListModel.addElement(ti.reference());
+ }
+ return;
+ }
+
+ private static void search(int fromIndex) {
+ final String query = searchBox.getText();
+ DefaultListModel oneBestListModel = (DefaultListModel) oneBestList.getModel();
+ for (int i = fromIndex; i < oneBestListModel.getSize(); i++) {
+ String reference = (String) oneBestListModel.getElementAt(i);
+ if (reference.indexOf(query) != -1) {
+ // found the query
+ oneBestList.setSelectedIndex(i);
+ oneBestList.ensureIndexIsVisible(i);
+ searchBox.setBackground(Color.white);
+ return;
+ }
+ }
+ searchBox.setBackground(Color.red);
+ }
+
+ private static class SearchListener implements DocumentListener {
+
+ public void insertUpdate(DocumentEvent e) {
+ final int selectedIndex = oneBestList.getSelectedIndex();
+ Browser.search(selectedIndex < 0 ? 0 : selectedIndex);
+ }
+
+ public void removeUpdate(DocumentEvent e) {
+ final String query = searchBox.getText();
+ if (query.equals("")) {
+ return;
+ } else {
+ insertUpdate(e);
+ }
+ }
+
+ public void changedUpdate(DocumentEvent e) {
+
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/DerivationTreeFrame.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/DerivationTreeFrame.java b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/DerivationTreeFrame.java
new file mode 100644
index 0000000..56366a0
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/DerivationTreeFrame.java
@@ -0,0 +1,253 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.ui.tree_visualizer.browser;
+
+import java.awt.BorderLayout;
+import java.awt.Color;
+import java.awt.GridLayout;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+
+import javax.swing.JButton;
+import javax.swing.JFrame;
+import javax.swing.JLabel;
+import javax.swing.JList;
+import javax.swing.JPanel;
+
+import org.apache.joshua.ui.tree_visualizer.DerivationTree;
+import org.apache.joshua.ui.tree_visualizer.DerivationViewer;
+import org.apache.joshua.ui.tree_visualizer.tree.Tree;
+
+/**
+ * A frame that displays a derivation tree.
+ *
+ * @author jonny
+ *
+ */
+class DerivationTreeFrame extends JFrame {
+ /**
+ * Eclipse seems to think serialVersionUID is important. I don't know why.
+ */
+ private static final long serialVersionUID = -3173826443907629130L;
+
+ /**
+ * A button to move to the next source-side sentence in the file.
+ */
+ JButton nextSource;
+ /**
+ * A button to move to the previous source-side sentence in the file.
+ */
+ JButton previousSource;
+
+ /**
+ * A button to show or hide extra information about the derivation.
+ */
+ private JButton informationButton;
+
+ /**
+ * A panel holding the extra information about the derivation.
+ */
+ private JPanel informationPanel;
+
+ /**
+ * A label holding the current source sentence.
+ */
+ private JLabel sourceLabel;
+
+ /**
+ * A label holding the reference translation of the current source sentence.
+ */
+ private JLabel referenceLabel;
+
+ /**
+ * A label holding the one-best translation of the current source sentence.
+ */
+ private JLabel oneBestLabel;
+
+ /**
+ * A panel that holds the buttons, as well as labels to show which derivation
+ * is currently being displayed.
+ */
+ private JPanel controlPanel;
+ /**
+ * A panel used to display the derivation tree itself.
+ */
+ private JPanel viewPanel;
+
+ /**
+ * This component displays the derivation tree's JUNG graph.
+ */
+ private DerivationViewer dv;
+
+ /**
+ * Index to determine which data set (which n-best file) this frame brings its
+ * graphs from.
+ */
+ private final int dataSetIndex;
+
+ private static final int DEFAULT_WIDTH = 640;
+ private static final int DEFAULT_HEIGHT = 480;
+
+ /**
+ * Color to use to render target-side trees.
+ */
+ private Color targetColor;
+
+ private JList mainList;
+
+ /**
+ * The default constructor.
+ */
+ public DerivationTreeFrame(int index, JList mainList) {
+ super("Joshua Derivation Tree");
+ this.mainList = mainList;
+ setLayout(new BorderLayout());
+ setSize(DEFAULT_WIDTH, DEFAULT_HEIGHT);
+ controlPanel = new JPanel(new BorderLayout());
+ informationPanel = new JPanel(new GridLayout(3, 1));
+
+ sourceLabel = new JLabel("source sentence");
+ referenceLabel = new JLabel("reference translation");
+ oneBestLabel = new JLabel("one best translation");
+
+ informationPanel.add(sourceLabel);
+ informationPanel.add(referenceLabel);
+ informationPanel.add(oneBestLabel);
+ informationPanel.setVisible(false);
+
+ controlPanel.add(informationPanel, BorderLayout.SOUTH);
+
+ initializeButtons();
+ layoutControl();
+
+ viewPanel = new JPanel(new BorderLayout());
+ dv = null;
+
+ dataSetIndex = index;
+ targetColor = Browser.dataSetColors[dataSetIndex % Browser.dataSetColors.length];
+
+ getContentPane().add(viewPanel, BorderLayout.CENTER);
+ getContentPane().add(controlPanel, BorderLayout.SOUTH);
+ // drawGraph();
+ setVisible(true);
+ }
+
+ /**
+ * Lays out the control buttons of this frame.
+ */
+ private void layoutControl() {
+ /*
+ * JPanel ctlLeft = new JPanel(new GridLayout(2, 1)); JPanel ctlCenter = new
+ * JPanel(new GridLayout(2, 1)); JPanel ctlRight = new JPanel(new
+ * GridLayout(2, 1));
+ *
+ * controlPanel.add(ctlLeft, BorderLayout.WEST); controlPanel.add(ctlCenter,
+ * BorderLayout.CENTER); controlPanel.add(ctlRight, BorderLayout.EAST);
+ *
+ * ctlLeft.add(previousSource); ctlRight.add(nextSource);
+ */
+
+ controlPanel.add(previousSource, BorderLayout.WEST);
+ controlPanel.add(nextSource, BorderLayout.EAST);
+ controlPanel.add(informationButton, BorderLayout.CENTER);
+ return;
+ }
+
+ /**
+ * Initializes the control buttons of this frame.
+ */
+ private void initializeButtons() {
+ nextSource = new JButton(">");
+ previousSource = new JButton("<");
+ informationButton = new JButton("More Information");
+
+ nextSource.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ int index = mainList.getSelectedIndex();
+ mainList.setSelectedIndex(index + 1);
+ return;
+ }
+ });
+ previousSource.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ int index = mainList.getSelectedIndex();
+ if (index > 0) {
+ mainList.setSelectedIndex(index - 1);
+ }
+ return;
+ }
+ });
+ informationButton.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ JButton source = (JButton) e.getSource();
+ if (informationPanel.isVisible()) {
+ source.setText("More Information");
+ informationPanel.setVisible(false);
+ } else {
+ source.setText("Less Information");
+ informationPanel.setVisible(true);
+ }
+ return;
+ }
+ });
+ return;
+ }
+
+ /**
+ * Displays the derivation tree for the current candidate translation. The
+ * current candidate translation is whichever translation is currently
+ * highlighted in the Derivation Browser's chooser frame.
+ */
+ public void drawGraph(TranslationInfo ti) {
+ viewPanel.removeAll();
+ String src = ti.sourceSentence();
+ Tree tgt = ti.translations().get(dataSetIndex);
+ String ref = ti.reference();
+
+ sourceLabel.setText(src);
+ referenceLabel.setText(ref);
+ oneBestLabel.setText(tgt.yield());
+
+ DerivationTree tree = new DerivationTree(tgt, src);
+ if (dv == null) {
+ dv = new DerivationViewer(tree, viewPanel.getSize(), targetColor,
+ DerivationViewer.AnchorType.ANCHOR_LEFTMOST_LEAF);
+ } else {
+ dv.setGraph(tree);
+ }
+ viewPanel.add(dv, BorderLayout.CENTER);
+ dv.revalidate();
+ repaint();
+ getContentPane().repaint();
+ return;
+ }
+
+ /**
+ * Makes this frame unmodifiable, so that the tree it displays cannot be
+ * changed. In fact, all that happens is the title is update and the
+ * navigation buttons are disabled. This method is intended to prevent the
+ * user from modifying the frame, not to prevent other code from modifying it.
+ */
+ public void disableNavigationButtons() {
+ setTitle(getTitle() + " (fixed)");
+ nextSource.setEnabled(false);
+ previousSource.setEnabled(false);
+ return;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/TranslationInfo.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/TranslationInfo.java b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/TranslationInfo.java
new file mode 100644
index 0000000..e23a89d
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/TranslationInfo.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.ui.tree_visualizer.browser;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.joshua.ui.tree_visualizer.tree.Tree;
+
+class TranslationInfo {
+ private String sourceSentence;
+ private String reference;
+ private ArrayList<Tree> translations;
+
+ public TranslationInfo() {
+ translations = new ArrayList<Tree>();
+ }
+
+ public String sourceSentence() {
+ return sourceSentence;
+ }
+
+ public void setSourceSentence(String src) {
+ sourceSentence = src;
+ return;
+ }
+
+ public String reference() {
+ return reference;
+ }
+
+ public void setReference(String ref) {
+ reference = ref;
+ return;
+ }
+
+ public List<Tree> translations() {
+ return translations;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java
new file mode 100644
index 0000000..662544b
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java
@@ -0,0 +1,283 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.ui.tree_visualizer.tree;
+
+import java.util.Stack;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Comparator;
+
+/**
+ * A class to represent the target-side tree produced by decoding using Joshua
+ * with an SCFG.
+ * <p>
+ * When decoding with use_tree_nbest=true, instead of a flat text output like
+ * "i asked her a question", we get a Penn treebank format tree like
+ * "(ROOT (S (NP i) (VP (V asked) (NP her) (NP (DT a) (N question)))))".
+ * If we also set include_align_index=true, we include source-side alignments
+ * for each internal node of the tree.
+ * <p>
+ * So, if the source input sentence is "je lui ai pose un question", if we
+ * turn on both configuration options, we end up with a decorated tree like
+ * this:
+ * "(ROOT{0-6} (S{0-6} (NP{0-1} i) (VP{1-6} (V{2-4} asked) (NP{1-2} her)
+ * (NP{4-6} (DT{4-5} a) (N{5-6} question)))))".
+ * <p>
+ * This class contains all the information of that flat string representation:
+ * the tree structure, the output (English) words, and the alignments to a
+ * source sentence.
+ * <p>
+ * Using a Tree the source sentence it was aligned to, we can create
+ * a DerivationTree object suitable for display.
+ *
+ * @author Jonny Weese jonny@cs.jhu.edu
+ */
+public class Tree {
+
+ /**
+ * An array holding the label of each node of the tree, in depth-first order.
+ * The label of a node means the NT label assigned to an internal node, or
+ * the terminal symbol (English word) at a leaf.
+ */
+ private final String [] labels;
+
+ /**
+ * The number of children of each node of the tree, in depth-first order.
+ */
+ private final int [] numChildren;
+
+ /**
+ * The smallest source-side index that each node covers, in depth-first order.
+ * Note that we only have this information for internal nodes. For leaves,
+ * this value will always be -1.
+ */
+ private final int [] sourceStartIndices;
+
+ /**
+ * 1 + the largest source-side index that each node covers, in depth-first
+ * order. Note that we only have this informaion for internal nodes. For
+ * leaves, this value will always be -1.
+ */
+ private final int [] sourceEndIndices;
+
+ /**
+ * A pattern to match an aligned internal node and pull out its information.
+ * This pattern matches:
+ *
+ * 1) start-of-string
+ * 2) (
+ * 3) an arbitrary sequence of non-whitespace characters (at least 1)
+ * 4) {
+ * 5) a decimal number
+ * 6) -
+ * 7) a decimal number
+ * 8) }
+ * 9) end-of-string
+ *
+ * That is, it matches something like "(FOO{32-55}". The string and two
+ * decimal numbers (parts 3, 5, and 7) are captured in groups.
+ */
+ private static final Pattern NONTERMINAL_PATTERN =
+ Pattern.compile("^\\((\\S+)\\{(\\d+)-(\\d+)\\}$");
+
+ /**
+ * Creates a Tree object from an input string in Penn treebank format with
+ * source alignment annotations.
+ * @param s an input string in Penn treebank format with source alignment annotations
+ */
+ public Tree(String s) {
+ final String [] tokens = s.replaceAll("\\)", " )").split("\\s+");
+ int numNodes = 0;
+ for (String t : tokens) {
+ if (!t.equals(")")) {
+ numNodes++;
+ }
+ }
+ labels = new String[numNodes];
+ numChildren = new int[numNodes];
+ sourceStartIndices = new int[numNodes];
+ sourceEndIndices = new int[numNodes];
+ try {
+ initialize(tokens);
+ } catch (Exception e) {
+ // This will catch most formatting errors.
+ throw new IllegalArgumentException(
+ String.format("couldn't create tree from string: \"%s\"", s),
+ e);
+ }
+ }
+
+ private void initialize(String [] tokens) {
+ final Stack<Integer> stack = new Stack<Integer>();
+ int nodeIndex = 0;
+ for (String token : tokens) {
+ final Matcher matcher = NONTERMINAL_PATTERN.matcher(token);
+ if (matcher.matches()) {
+ // new non-terminal node
+ labels[nodeIndex] = matcher.group(1);
+ sourceStartIndices[nodeIndex] = Integer.parseInt(matcher.group(2));
+ sourceEndIndices[nodeIndex] = Integer.parseInt(matcher.group(3));
+ stack.push(nodeIndex);
+ nodeIndex++;
+ } else if (token.equals(")")) {
+ // finished a subtree
+ stack.pop();
+ if (stack.empty()) {
+ break;
+ } else {
+ numChildren[stack.peek()]++;
+ }
+ } else {
+ // otherwise, it's a new leaf node
+ labels[nodeIndex] = token;
+ sourceStartIndices[nodeIndex] = -1;
+ sourceEndIndices[nodeIndex] = -1;
+ numChildren[stack.peek()]++;
+ nodeIndex++;
+ }
+ }
+ if (!stack.empty()) {
+ // Not enough close-parentheses at the end of the tree.
+ throw new IllegalArgumentException();
+ }
+ }
+
+ /**
+ * Return the number of nodes in this Tree.
+ * @return the number of nodes in this Tree
+ */
+ public int size() {
+ return labels.length;
+ }
+
+ /**
+ * Get the root Node of this Tree.
+ * @return the Node present at the toom the this Tree
+ */
+ public Node root() {
+ return new Node(0);
+ }
+
+ private List<Integer> childIndices(int index) {
+ List<Integer> result = new ArrayList<Integer>();
+ int remainingChildren = numChildren[index];
+ int childIndex = index + 1;
+ while (remainingChildren > 0) {
+ result.add(childIndex);
+ childIndex = nextSiblingIndex(childIndex);
+ remainingChildren--;
+ }
+ return result;
+ }
+
+ private int nextSiblingIndex(int index) {
+ int result = index + 1;
+ int remainingChildren = numChildren[index];
+ for (int i = 0; i < remainingChildren; i++) {
+ result = nextSiblingIndex(result);
+ }
+ return result;
+ }
+
+ public String yield() {
+ String result = "";
+ for (int i = 0; i < labels.length; i++) {
+ if (numChildren[i] == 0) {
+ if (!result.equals("")) {
+ result += " ";
+ }
+ result += labels[i];
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return root().toString();
+ }
+
+ /**
+ * A class representing the Nodes of a tree.
+ */
+ public class Node {
+
+ /**
+ * The index into the Tree class's internal arrays.
+ */
+ private final int index;
+
+ private Node(int i) {
+ index = i;
+ }
+
+ /**
+ * Get the label for this node. If the node is internal to the tree, its
+ * label is the non-terminal label assigned to it. If it is a leaf node,
+ * the label is the English word at the leaf.
+ * @return a string representing the label for this node
+ */
+ public String label() {
+ return labels[index];
+ }
+
+ public boolean isLeaf() {
+ return numChildren[index] == 0;
+ }
+
+ public int sourceStartIndex() {
+ return sourceStartIndices[index];
+ }
+
+ public int sourceEndIndex() {
+ return sourceEndIndices[index];
+ }
+
+ public List<Node> children() {
+ List<Node> result = new ArrayList<Node>();
+ for (int j : childIndices(index)) {
+ result.add(new Node(j));
+ }
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ if (isLeaf()) {
+ return label();
+ }
+ String result = String.format("(%s{%d-%d}",
+ label(),
+ sourceStartIndex(),
+ sourceEndIndex());
+ for (Node c : children()) {
+ result += String.format(" %s", c);
+ }
+ return result + ")";
+ }
+ }
+
+ public static class NodeSourceStartComparator implements Comparator<Node> {
+ public int compare(Node a, Node b) {
+ return a.sourceStartIndex() - b.sourceStartIndex();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/Algorithms.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/Algorithms.java b/joshua-core/src/main/java/org/apache/joshua/util/Algorithms.java
new file mode 100644
index 0000000..327c882
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/Algorithms.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+public final class Algorithms {
+
+ /**
+ * Calculates the Levenshtein Distance for a candidate paraphrase given the source.
+ *
+ * The code is based on the example by Michael Gilleland found at
+ * http://www.merriampark.com/ld.htm.
+ * @param candidate todo
+ * @param source todo
+ * @return the minimum edit distance.
+ */
+ public static final int levenshtein(String[] candidate, String[] source) {
+ // First check to see whether either of the arrays
+ // is empty, in which case the least cost is simply
+ // the length of the other array (which would correspond
+ // to inserting that many elements.
+ if (source.length == 0) return candidate.length;
+ if (candidate.length == 0) return source.length;
+
+ // Initialize a table to the minimum edit distances between
+ // any two points in the arrays. The size of the table is set
+ // to be one beyond the lengths of the two arrays, and the first
+ // row and first column are set to be zero to avoid complicated
+ // checks for out of bounds exceptions.
+ int distances[][] = new int[source.length + 1][candidate.length + 1];
+
+ for (int i = 0; i <= source.length; i++)
+ distances[i][0] = i;
+ for (int j = 0; j <= candidate.length; j++)
+ distances[0][j] = j;
+
+ // Walk through each item in the source and target arrays
+ // and find the minimum cost to move from the previous points
+ // to here.
+ for (int i = 1; i <= source.length; i++) {
+ Object sourceItem = source[i - 1];
+ for (int j = 1; j <= candidate.length; j++) {
+ Object targetItem = candidate[j - 1];
+ int cost;
+ if (sourceItem.equals(targetItem))
+ cost = 0;
+ else
+ cost = 1;
+ int deletionCost = distances[i - 1][j] + 1;
+ int insertionCost = distances[i][j - 1] + 1;
+ int substitutionCost = distances[i - 1][j - 1] + cost;
+ distances[i][j] = minimum(insertionCost, deletionCost, substitutionCost);
+ }
+ }
+ // The point at the end will be the minimum edit distance.
+ return distances[source.length][candidate.length];
+ }
+
+ /**
+ * Returns the minimum of the three values.
+ */
+ private static final int minimum(int a, int b, int c) {
+ int minimum;
+ minimum = a;
+ if (b < minimum) minimum = b;
+ if (c < minimum) minimum = c;
+ return minimum;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/Bits.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/Bits.java b/joshua-core/src/main/java/org/apache/joshua/util/Bits.java
new file mode 100644
index 0000000..b5294f6
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/Bits.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+/**
+ * Utility class for bit twiddling.
+ *
+ * @author Lane Schwartz
+ */
+public class Bits {
+
+ /**
+ * Encodes two shorts in an int.
+ *
+ * @param high input high short to encode
+ * @param low input low short to encode
+ * @return encoded int
+ */
+ public static int encodeAsInt(short high, short low) {
+
+ // Store the first short value in the highest 16 bits of the int
+ int key = high | 0x00000000;
+ key <<= 16;
+
+ // Store the second short value in the lowest 16 bits of the int
+ int lowInt = low & 0x0000FFFF;
+ key |= lowInt;
+
+ return key;
+
+ }
+
+ /**
+ * Decodes the high 16 bits of an integer as a short.
+ *
+ * @param i Integer value to decode
+ * @return Short representation of the high 16 bits of the integer
+ */
+ public static short decodeHighBits(int i) {
+
+ long key = i & 0xFFFF0000l;
+
+ key >>= 16;
+
+ return (short) key;
+
+ }
+
+
+ /**
+ * Decodes the low 16 bits of an integer as a short.
+ *
+ * @param i Integer value to decode
+ * @return Short representation of the high 16 bits of the integer
+ */
+ public static short decodeLowBits(int i) {
+
+ return (short) i;
+
+ }
+
+
+ /**
+ * Encodes two integers in a long.
+ *
+ * @param high input high int to encode
+ * @param low input low int to encode
+ * @return encoded long
+ */
+ public static long encodeAsLong(int high, int low) {
+
+ // Store the first int value in the highest 32 bits of the long
+ long key = high | 0x0000000000000000l;
+ key <<= 32;
+
+ // Store the second int value in the lowest 32 bits of the long
+ long lowLong = low & 0x00000000FFFFFFFFl;;
+ key |= lowLong;
+
+ return key;
+
+ }
+
+ /**
+ * Decodes the high 32 bits of a long as an integer.
+ *
+ * @param l Long value to decode
+ * @return Integer representation of the high 32 bits of the long
+ */
+ public static int decodeHighBits(long l) {
+
+ long key = l & 0xFFFFFFFF00000000l;
+
+ key >>= 32;
+
+ return (int) key;
+
+ }
+
+
+ /**
+ * Decodes the low 32 bits of a long as an integer.
+ *
+ * @param l Long value to decode
+ * @return Integer representation of the high 32 bits of the long
+ */
+ public static int decodeLowBits(long l) {
+
+ return (int) l;
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/BotMap.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/BotMap.java b/joshua-core/src/main/java/org/apache/joshua/util/BotMap.java
new file mode 100644
index 0000000..1cc82b5
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/BotMap.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Gets a special map that maps any key to the a particular value.
+ *
+ * @author Lane Schwartz
+ * @see "Lopez (2008), footnote 9 on p73"
+ */
+public class BotMap<K, V> implements Map<K, V> {
+
+ /** Special value, which this map will return for every key. */
+ private final V value;
+
+ /**
+ * Constructs a special map that maps any key to the a particular value.
+ *
+ * @param value Special value, which this map will return for every key.
+ */
+ public BotMap(V value) {
+ this.value = value;
+ }
+
+ public void clear() {
+ throw new UnsupportedOperationException();
+ }
+
+ public boolean containsKey(Object key) {
+ return true;
+ }
+
+ public boolean containsValue(Object value) {
+ return this.value == value;
+ }
+
+ public Set<Map.Entry<K, V>> entrySet() {
+ throw new UnsupportedOperationException();
+ }
+
+ public V get(Object key) {
+ return value;
+ }
+
+ public boolean isEmpty() {
+ return false;
+ }
+
+ public Set<K> keySet() {
+ throw new UnsupportedOperationException();
+ }
+
+ public V put(K key, V value) {
+ throw new UnsupportedOperationException();
+ }
+
+ public void putAll(Map<? extends K, ? extends V> t) {
+ throw new UnsupportedOperationException();
+ }
+
+ public V remove(Object key) {
+ throw new UnsupportedOperationException();
+ }
+
+ public int size() {
+ throw new UnsupportedOperationException();
+ }
+
+ public Collection<V> values() {
+ return Collections.singleton(value);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/Cache.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/Cache.java b/joshua-core/src/main/java/org/apache/joshua/util/Cache.java
new file mode 100644
index 0000000..0d72f8a
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/Cache.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+// Imports
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ * Cache is a class that implements a least recently used cache. It is a straightforward extension
+ * of java.util.LinkedHashMap with its removeEldestEntry method overridden, so that stale entries
+ * are deleted once we reach the specified capacity of the Cache.
+ * <p>
+ * This class is quite useful for storing the results of computations that we would do many times
+ * over in the FeatureFunctions.
+ *
+ * @author Chris Callison-Burch
+ * @since 14 April 2005
+ *
+ */
+public class Cache<K, V> extends LinkedHashMap<K, V> {
+
+ private static final long serialVersionUID = 6073387072740892061L;
+
+ /** Logger for this class. */
+ private static final Logger LOG = LoggerFactory.getLogger(Cache.class);
+ // ===============================================================
+ // Constants
+ // ===============================================================
+
+ /**
+ * A constant is used as the default the cache size if none is specified.
+ */
+ public static final int DEFAULT_CAPACITY = 100000000;
+
+ /** Default initial capacity of the cache. */
+ public static final int INITIAL_CAPACITY = 1000000;
+
+ /** Default load factor of the cache. */
+ public static final float LOAD_FACTOR = 0.75f;
+
+ /**
+ * By default, ordering mode of the cache is access order (true).
+ */
+ public static final boolean ACCESS_ORDER = true;
+
+
+ // ===============================================================
+ // Member variables
+ // ===============================================================
+
+ /** Maximum number of items that the cache can contain. */
+ int maxCapacity;
+
+ // ===============================================================
+ // Constructor(s)
+ // ===============================================================
+
+ /**
+ * Creates a Cache with a set capacity.
+ *
+ * @param maxCapacity the maximum capacity of the cache.
+ */
+ public Cache(int maxCapacity) {
+ super((maxCapacity < INITIAL_CAPACITY) ? maxCapacity : INITIAL_CAPACITY, LOAD_FACTOR,
+ ACCESS_ORDER);
+ this.maxCapacity = maxCapacity;
+ }
+
+
+ /**
+ * Creates a Cache with the DEFAULT_CAPACITY.
+ */
+ public Cache() {
+ this(DEFAULT_CAPACITY);
+ }
+
+ // ===============================================================
+ // Public
+ // ===============================================================
+
+ // ===========================================================
+ // Accessor methods (set/get)
+ // ===========================================================
+
+ @Override
+ public V get(Object key) {
+ LOG.debug("Cache get key: {}", key);
+ return super.get(key);
+ }
+
+
+ @Override
+ public V put(K key, V value) {
+ LOG.debug("Cache put key: {}", key);
+ return super.put(key, value);
+ }
+
+ // ===========================================================
+ // Methods
+ // ===========================================================
+
+ @Override
+ public boolean containsKey(Object key) {
+ boolean contains = super.containsKey(key);
+ if (contains){
+ LOG.debug("Cache has key: {}", key);
+ } else {
+ LOG.debug("Cache lacks key: {}", key);
+ }
+ return contains;
+ }
+
+
+ // ===============================================================
+ // Protected
+ // ===============================================================
+
+ // ===============================================================
+ // Methods
+ // ===============================================================
+
+ /**
+ * This method is invoked by put and putAll after inserting a new entry into the map. Once we
+ * reach the capacity of the cache, we remove the oldest entry each time a new entry is added.
+ * This reduces memory consumption by deleting stale entries.
+ *
+ * @param eldest the eldest entry
+ * @return true if the capacity is greater than the maximum capacity
+ */
+ protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
+ boolean removing = size() > maxCapacity;
+ if (removing ) {
+ LOG.debug("Cache loses key: {}", eldest.getKey());
+ }
+ return removing;
+ }
+
+ // ===============================================================
+ // Private
+ // ===============================================================
+
+ // ===============================================================
+ // Methods
+ // ===============================================================
+
+
+ // ===============================================================
+ // Static
+ // ===============================================================
+
+
+ // ===============================================================
+ // Main
+ // ===============================================================
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/ChartSpan.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/ChartSpan.java b/joshua-core/src/main/java/org/apache/joshua/util/ChartSpan.java
new file mode 100644
index 0000000..b22d2aa
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/ChartSpan.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+/**
+ * CKY-based decoding makes extensive use of charts, which maintain information about spans (i, j)
+ * over the length-n input sentence, 0 <= i <= j <= n. These charts are used for many things; for
+ * example, lattices use a chart to denote whether there is a path between nodes i and j, and what
+ * their costs is, and the decoder uses charts to record the partial application of rules (DotChart})
+ * and the existence of proved items ({@link org.apache.joshua.decoder.phrase.PhraseChart}).
+ *
+ * The dummy way to implement a chart is to initialize a two-dimensional array; however, this wastes
+ * a lot of space, because the constraint (i <= j) means that only half of this space can ever be
+ * used. This is especially a problem for lattices, where the sentence length (n) is the number of
+ * nodes in the lattice!
+ *
+ * Fortunately, there is a smarter way, since there is a simple deterministic mapping between chart
+ * spans under a given maximum length. This class implements that in a generic way, introducing
+ * large savings in both space and time.
+ *
+ * @author Matt Post post@cs.jhu.edu
+ */
+public class ChartSpan<Type> {
+ Object[] chart;
+ int max;
+
+ public ChartSpan(int w, Type defaultValue) {
+ //System.err.println(String.format("ChartSpan::ChartSpan(%d)", w));
+ this.max = w;
+
+ /* offset(max,max) is the last position in the array */
+ chart = new Object[offset(max,max) + 1];
+
+ /* Initialize all arcs to infinity, except self-loops, which have distance 0 */
+ for (int i = 0; i < chart.length; i++)
+ chart[i] = defaultValue;
+ }
+
+ @SuppressWarnings("unchecked")
+ public Type get(int i, int j) {
+ return (Type) chart[offset(i, j)];
+ }
+
+ public void set(int i, int j, Type value) {
+ chart[offset(i, j)] = value;
+ }
+
+ /**
+ * This computes the offset into the one-dimensional array for a given span.
+ *
+ * @param i source node in span
+ * @param j target node in span
+ * @return the offset
+ */
+ private int offset(int i, int j) {
+ if (i < 0 || j > max || i > j) {
+ throw new RuntimeException(String.format("Invalid span (%d,%d | %d)", i, j, max));
+ }
+
+ return i * (max + 1) - i * (i + 1) / 2 + j;
+ }
+
+ /**
+ * Convenience function for setting the values along the diagonal.
+ *
+ * @param value input Type for which to set values
+ */
+ public void setDiagonal(Type value) {
+ for (int i = 0; i <= max; i++)
+ set(i, i, value);
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/CommandLineParser.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/CommandLineParser.java b/joshua-core/src/main/java/org/apache/joshua/util/CommandLineParser.java
new file mode 100644
index 0000000..974b973
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/CommandLineParser.java
@@ -0,0 +1,738 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Java Command Line Parser
+ * <p>
+ * The current version supports string and integer options.
+ * <p>
+ * Support is not included for options which take a list of values.
+ *
+ * @author Lane O.B. Schwartz
+ */
+@SuppressWarnings("rawtypes")
+public class CommandLineParser {
+
+ private Map<Character, Option<Integer>> intShortForms;
+ private Map<String, Option<Integer>> intLongForms;
+
+ private Map<Character, Option<String>> stringShortForms;
+ private Map<String, Option<String>> stringLongForms;
+
+ private Map<Character, Option<Boolean>> booleanShortForms;
+ private Map<String, Option<Boolean>> booleanLongForms;
+
+ private List<Option> allOptions;
+
+ private final Set<String> localizedTrueStrings = new HashSet<String>();
+ private final Set<String> localizedFalseStrings = new HashSet<String>();
+
+ public CommandLineParser() {
+ intShortForms = new HashMap<Character, Option<Integer>>();
+ intLongForms = new HashMap<String, Option<Integer>>();
+
+ stringShortForms = new HashMap<Character, Option<String>>();
+ stringLongForms = new HashMap<String, Option<String>>();
+
+ booleanShortForms = new HashMap<Character, Option<Boolean>>();
+ booleanLongForms = new HashMap<String, Option<Boolean>>();
+
+ allOptions = new LinkedList<Option>();
+
+ localizedTrueStrings.add("true");
+ localizedTrueStrings.add("yes");
+ localizedFalseStrings.add("false");
+ localizedFalseStrings.add("no");
+ }
+
+ public CommandLineParser(Set<String> localizedTrueStrings, Set<String> localizedFalseStrings) {
+ this();
+
+ this.localizedTrueStrings.clear();
+ this.localizedFalseStrings.clear();
+
+ this.localizedTrueStrings.addAll(localizedTrueStrings);
+ this.localizedFalseStrings.addAll(localizedFalseStrings);
+ }
+
+ public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+ Integer defaultValue, Set<Integer> legalValues, String comment) {
+ if (shortForm != Option.MISSING_SHORT_FORM && (intShortForms.containsKey(shortForm))
+ || (!longForm.equals(Option.MISSING_LONG_FORM) && intLongForms.containsKey(longForm)))
+ throw new DuplicateOptionException("Duplicate options are not allowed");
+
+ Option<Integer> o =
+ new Option<Integer>(shortForm, longForm, valueVariable, defaultValue, legalValues, comment);
+ intShortForms.put(shortForm, o);
+ intLongForms.put(longForm, o);
+ allOptions.add(o);
+ return o;
+ }
+
+ public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+ Set<Integer> legalValues, String comment) {
+ return addIntegerOption(shortForm, longForm, valueVariable, null, legalValues, comment);
+ }
+
+ public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+ String comment) {
+ return addIntegerOption(shortForm, longForm, valueVariable, null, new UniversalSet<Integer>(),
+ comment);
+ }
+
+ public Option<Integer> addIntegerOption(char shortForm, String longForm, String comment) {
+ return addIntegerOption(shortForm, longForm, null, null, new UniversalSet<Integer>(), comment);
+ }
+
+ public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+ Integer defaultValue, String comment) {
+ return addIntegerOption(shortForm, longForm, valueVariable, defaultValue,
+ new UniversalSet<Integer>(), comment);
+ }
+
+ public Option<Integer> addIntegerOption(String longForm, String valueVariable,
+ Integer defaultValue, String comment) {
+ return addIntegerOption(Option.MISSING_SHORT_FORM, longForm, valueVariable, defaultValue,
+ new UniversalSet<Integer>(), comment);
+ }
+
+ public Option<Integer> addIntegerOption(char shortForm, String longForm) {
+ return addIntegerOption(shortForm, longForm, null, null, new UniversalSet<Integer>(), "");
+ }
+
+ public Option<Integer> addIntegerOption(char shortForm) {
+ return addIntegerOption(shortForm, Option.MISSING_LONG_FORM);
+ }
+
+ public Option<Integer> addIntegerOption(String longForm) {
+ return addIntegerOption(Option.MISSING_SHORT_FORM, longForm);
+ }
+
+ public Option<Integer> addIntegerOption(String longForm, String comment) {
+ return addIntegerOption(Option.MISSING_SHORT_FORM, longForm, comment);
+ }
+
+
+ // String options
+
+
+ public Option<String> addStringOption(char shortForm, String longForm, String valueVariable,
+ String defaultValue, Set<String> legalValues, String comment) {
+ if (shortForm != Option.MISSING_SHORT_FORM && (intShortForms.containsKey(shortForm))
+ || (!longForm.equals(Option.MISSING_LONG_FORM) && intLongForms.containsKey(longForm)))
+ throw new DuplicateOptionException("Duplicate options are not allowed");
+
+ Option<String> o =
+ new Option<String>(shortForm, longForm, valueVariable, defaultValue, legalValues, comment);
+ stringShortForms.put(shortForm, o);
+ stringLongForms.put(longForm, o);
+ allOptions.add(o);
+ return o;
+ }
+
+ public Option<String> addStringOption(char shortForm, String longForm, String valueVariable,
+ Set<String> legalValues, String comment) {
+ return addStringOption(shortForm, longForm, valueVariable, null, legalValues, comment);
+ }
+
+ public Option<String> addStringOption(char shortForm, String longForm, String valueVariable,
+ String comment) {
+ return addStringOption(shortForm, longForm, valueVariable, null, new UniversalSet<String>(),
+ comment);
+ }
+
+ public Option<String> addStringOption(String longForm, String valueVariable, String comment) {
+ return addStringOption(Option.MISSING_SHORT_FORM, longForm, valueVariable, null,
+ new UniversalSet<String>(), comment);
+ }
+
+ public Option<String> addStringOption(char shortForm, String longForm, String comment) {
+ return addStringOption(shortForm, longForm, null, null, new UniversalSet<String>(), comment);
+ }
+
+ public Option<String> addStringOption(char shortForm, String longForm, String valueVariable,
+ String defaultValue, String comment) {
+ return addStringOption(shortForm, longForm, valueVariable, defaultValue,
+ new UniversalSet<String>(), comment);
+ }
+
+ public Option<String> addStringOption(String longForm, String valueVariable, String defaultValue,
+ String comment) {
+ return addStringOption(Option.MISSING_SHORT_FORM, longForm, valueVariable, defaultValue,
+ new UniversalSet<String>(), comment);
+ }
+
+ public Option<String> addStringOption(char shortForm, String longForm) {
+ return addStringOption(shortForm, longForm, null, null, new UniversalSet<String>(), "");
+ }
+
+ public Option<String> addStringOption(char shortForm) {
+ return addStringOption(shortForm, Option.MISSING_LONG_FORM);
+ }
+
+ public Option<String> addStringOption(String longForm) {
+ return addStringOption(Option.MISSING_SHORT_FORM, longForm);
+ }
+
+ public Option<String> addStringOption(String longForm, String comment) {
+ return addStringOption(Option.MISSING_SHORT_FORM, longForm, comment);
+ }
+
+
+ // boolean options
+
+ public Option<Boolean> addBooleanOption(char shortForm, String longForm, String valueVariable,
+ Boolean defaultValue, String comment) {
+ if (shortForm != Option.MISSING_SHORT_FORM && (booleanShortForms.containsKey(shortForm))
+ || (!longForm.equals(Option.MISSING_LONG_FORM) && booleanLongForms.containsKey(longForm)))
+ throw new DuplicateOptionException("Duplicate options are not allowed");
+ Set<Boolean> legalBooleanValues = new HashSet<Boolean>();
+ legalBooleanValues.add(true);
+ legalBooleanValues.add(false);
+
+ Option<Boolean> o =
+ new Option<Boolean>(shortForm, longForm, valueVariable, defaultValue, legalBooleanValues,
+ comment);
+ booleanShortForms.put(shortForm, o);
+ booleanLongForms.put(longForm, o);
+ allOptions.add(o);
+ return o;
+ }
+
+ public Option<Boolean> addBooleanOption(char shortForm, String longForm, String valueVariable,
+ String comment) {
+ return addBooleanOption(shortForm, longForm, valueVariable, null, comment);
+ }
+
+ public Option<Boolean> addBooleanOption(char shortForm, String longForm, String comment) {
+ return addBooleanOption(shortForm, longForm, null, null, comment);
+ }
+
+ public Option<Boolean> addBooleanOption(String longForm, Boolean defaultValue, String comment) {
+ return addBooleanOption(Option.MISSING_SHORT_FORM, longForm, null, defaultValue, comment);
+ }
+
+ public Option<Boolean> addBooleanOption(String longForm, String valueVariable,
+ Boolean defaultValue, String comment) {
+ return addBooleanOption(Option.MISSING_SHORT_FORM, longForm, valueVariable, defaultValue,
+ comment);
+ }
+
+ public Option<Boolean> addBooleanOption(char shortForm, String longForm) {
+ return addBooleanOption(shortForm, longForm, null, null, "");
+ }
+
+ public Option<Boolean> addBooleanOption(char shortForm) {
+ return addBooleanOption(shortForm, Option.MISSING_LONG_FORM);
+ }
+
+ public Option<Boolean> addBooleanOption(String longForm) {
+ return addBooleanOption(Option.MISSING_SHORT_FORM, longForm);
+ }
+
+ public Option<Boolean> addBooleanOption(String longForm, String comment) {
+ return addBooleanOption(Option.MISSING_SHORT_FORM, longForm, comment);
+ }
+
+
+
+ // float options
+
+
+
+ // /
+ /*
+ * public Option<Integer> addIntegerOption(char shortForm, String longForm) { if
+ * (intShortForms.containsKey(shortForm) || intLongForms.containsKey(longForm)) throw new
+ * DuplicateOptionException("Duplicate options are not allowed");
+ *
+ * Option<Integer> o = new Option<Integer>(shortForm, longForm); intShortForms.put(shortForm, o);
+ * intLongForms.put(longForm, o); allOptions.add(o);
+ *
+ * return o; }
+ *
+ * public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+ * int defaultValue, Set<Integer> legalValues, String comment) { if
+ * (intShortForms.containsKey(shortForm) || intLongForms.containsKey(longForm)) throw new
+ * DuplicateOptionException("Duplicate options are not allowed");
+ *
+ * Option<Integer> o = new Option<Integer>(shortForm, longForm, valueVariable, defaultValue,
+ * comment); intShortForms.put(shortForm, o); intLongForms.put(longForm, o); allOptions.add(o);
+ * return o; }
+ *
+ * public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+ * int defaultValue, String comment) { if (intShortForms.containsKey(shortForm) ||
+ * intLongForms.containsKey(longForm)) throw new
+ * DuplicateOptionException("Duplicate options are not allowed");
+ *
+ * Option<Integer> o = new Option<Integer>(shortForm, longForm, valueVariable, defaultValue,
+ * comment); intShortForms.put(shortForm, o); intLongForms.put(longForm, o); allOptions.add(o);
+ * return o; }
+ *
+ * public Option<Integer> addIntegerOption(char shortForm, String longForm, String valueVariable,
+ * String comment) { if (intShortForms.containsKey(shortForm) ||
+ * intLongForms.containsKey(longForm)) throw new
+ * DuplicateOptionException("Duplicate options are not allowed");
+ *
+ * Option<Integer> o = new Option<Integer>(shortForm, longForm, valueVariable, comment);
+ * intShortForms.put(shortForm, o); intLongForms.put(longForm, o); allOptions.add(o); return o; }
+ */
+
+ /*
+ * public Option<String> addStringOption(char shortForm, String longForm) { if
+ * (stringShortForms.containsKey(shortForm) || stringLongForms.containsKey(longForm)) throw new
+ * DuplicateOptionException("Duplicate options are not allowed");
+ *
+ * Option<String> o = new Option<String>(shortForm, longForm); stringShortForms.put(shortForm, o);
+ * stringLongForms.put(longForm, o); allOptions.add(o); return o; }
+ */
+
+ public void parse(String[] argv) {
+
+ Collection<Option> parsedOptions = new HashSet<Option>();
+
+ int index = 0;
+
+ while (index < argv.length) {
+ if (argv[index].startsWith("--")) {
+ int splitPoint = argv[index].indexOf('=');
+ if (splitPoint == 2) {
+ throw new CommandLineParserException("Invalid option: --");
+ } else if (splitPoint >= 0) {
+ String option = argv[index].substring(2, splitPoint);
+ String value = argv[index].substring(splitPoint + 1);
+ parsedOptions.add(parseLongForm(option, value));
+ } else if (index + 1 < argv.length) {
+ String option = argv[index].substring(2);
+ String value = argv[index + 1];
+ if (value.startsWith("-") && !value.equals("-") && !value.equals("--")) {
+ parsedOptions.add(parseLongForm(option));
+ } else {
+ parsedOptions.add(parseLongForm(option, value));
+ index++;
+ }
+ } else {
+ // Must be a boolean option
+ String option = argv[index].substring(2);
+ parsedOptions.add(parseLongForm(option));
+ // throw new CommandLineParserException("No value provided for option " +
+ // argv[index].substring(2));
+ }
+ } else if (argv[index].startsWith("-")) {
+ String option = argv[index].substring(1);
+ if (option.length() == 1) {
+ if (index + 1 < argv.length) {
+ String value = argv[index + 1];
+ if (value.startsWith("-") && !value.equals("-") && !value.equals("--")) {
+ // Must be a boolean option
+ parsedOptions.add(parseShortForm(option.charAt(0)));
+ } else {
+ parsedOptions.add(parseShortForm(option.charAt(0), value));
+ index++;
+ }
+ } else {
+ // Must be a boolean option
+ parsedOptions.add(parseShortForm(option.charAt(0)));
+ }
+ } else {
+ throw new CommandLineParserException(argv[index] + " is not a valid option");
+ }
+ }
+ index++;
+ }
+
+ for (Option o : allOptions) {
+ if (o.isRequired() && !parsedOptions.contains(o)) {
+ die("A required option was not provided:\n " + o + "\n");
+ }
+ }
+
+ }
+
+ public void printUsage() {
+ System.err.println("Usage:");
+ for (Option o : allOptions) {
+ System.err.println(o);
+ }
+ }
+
+ private void die(String error) {
+ System.err.println(error);
+ printUsage();
+ System.exit(1);
+ }
+
+ public Option parseLongForm(String key, String value) {
+
+ if (intLongForms.containsKey(key)) {
+ try {
+ Option<Integer> o = intLongForms.get(key);
+ o.setValue(Integer.valueOf(value));
+ return o;
+ } catch (NumberFormatException e) {
+ die("Option " + key + " requires an integer value.");
+ return null;
+ }
+ } else if (stringLongForms.containsKey(key)) {
+ Option<String> o = stringLongForms.get(key);
+ o.setValue(value);
+ return o;
+ } else if (booleanLongForms.containsKey(key)) {
+ Option<Boolean> o = booleanLongForms.get(key);
+
+ if (localizedTrueStrings.contains(value.toLowerCase())) {
+ o.setValue(true);
+ } else if (localizedFalseStrings.contains(value.toLowerCase())) {
+ o.setValue(false);
+ } else {
+ throw new CommandLineParserException("Invalid value \"" + value + "\" for boolean option "
+ + key);
+ }
+
+ return o;
+ } else {
+
+ throw new Error("Bug in command line parser - unexpected option type encountered for option "
+ + key);
+ }
+ }
+
+ public Option parseLongForm(String key) {
+
+ if (booleanLongForms.containsKey(key)) {
+ Option<Boolean> o = booleanLongForms.get(key);
+ o.setValue(true);
+ return o;
+
+ } else {
+ throw new CommandLineParserException("No such boolean option exists: --" + key);
+ }
+ }
+
+ public Option parseShortForm(Character key) {
+
+ if (booleanShortForms.containsKey(key)) {
+ Option<Boolean> o = booleanShortForms.get(key);
+ o.setValue(true);
+ return o;
+
+ } else {
+ throw new CommandLineParserException("No such boolean option exists: -" + key);
+ }
+ }
+
+ public Option parseShortForm(Character key, String value) {
+ if (intShortForms.containsKey(key)) {
+ try {
+ Option<Integer> o = intShortForms.get(key);
+ o.setValue(Integer.valueOf(value));
+ return o;
+ } catch (NumberFormatException e) {
+ die("Option " + key + " requires an integer value.");
+ return null;
+ }
+ } else if (stringShortForms.containsKey(key)) {
+ Option<String> o = stringShortForms.get(key);
+ o.setValue(value);
+ return o;
+ } else if (booleanShortForms.containsKey(key)) {
+ Option<Boolean> o = booleanShortForms.get(key);
+
+ if (localizedTrueStrings.contains(value.toLowerCase())) {
+ o.setValue(true);
+ } else if (localizedFalseStrings.contains(value.toLowerCase())) {
+ o.setValue(false);
+ } else {
+ throw new CommandLineParserException("Invalid value \"" + value + "\" for boolean option "
+ + key);
+ }
+
+ return o;
+ } else {
+ throw new Error("Bug in command line parser - unexpected option type encountered");
+ }
+ }
+
+ /*
+ * public int intValue(Option o) { if (intOptions.containsKey(o)) return intOptions.get(o); else
+ * throw new RuntimeException("No such integer option"); }
+ *
+ * public String stringValue(Option o) { if (stringOptions.containsKey(o)) return
+ * stringOptions.get(o); else throw new RuntimeException("No such string option"); }
+ */
+
+ public <OptionType> OptionType getValue(Option<OptionType> option) {
+ return option.getValue();
+ }
+
+ public boolean hasValue(Option<?> option) {
+ return option.hasValue();
+ }
+
+ public static void main(String[] args) {
+ CommandLineParser parser = new CommandLineParser();
+ Option<Integer> n = parser.addIntegerOption('n', "number", "NUMBER", "a number to be supplied");
+
+ parser.parse(args);
+
+ // parser.printUsage();
+ System.out.println(parser.getValue(n));
+ }
+
+ @SuppressWarnings("serial")
+ public static class CommandLineParserException extends RuntimeException {
+ public CommandLineParserException(String message) {
+ super(message);
+ }
+ }
+
+ @SuppressWarnings("serial")
+ public static class DuplicateOptionException extends RuntimeException {
+ public DuplicateOptionException(String message) {
+ super(message);
+ }
+ }
+
+ public class Option<OptionType> {
+ private final char shortForm;
+ private final String longForm;
+ private final String comment;
+ private final OptionType defaultValue;
+ private final String valueVariable;
+ private final Set<OptionType> legalValues;
+
+ public static final char MISSING_SHORT_FORM = '\u0000';
+ public static final String MISSING_LONG_FORM = "\u0000";
+
+ private OptionType optionValue;
+
+ public Option(char shortForm, String longForm, String valueVariable, OptionType defaultValue,
+ Set<OptionType> legalValues, String comment) {
+
+ if (longForm == null) throw new NullPointerException("longForm must not be null");
+
+ if (comment == null) throw new NullPointerException("comment must not be null");
+
+ this.shortForm = shortForm;
+ this.longForm = longForm;
+ this.comment = comment;
+ this.valueVariable = valueVariable;
+ this.defaultValue = defaultValue;
+ this.legalValues = legalValues;
+ this.optionValue = null;
+ }
+
+ public Option(char shortForm, String longForm, String valueVariable,
+ Set<OptionType> legalValues, String comment) {
+ this(shortForm, longForm, valueVariable, null, legalValues, comment);
+ }
+
+
+ public Option(char shortForm, String longForm, String valueVariable, String comment) {
+ this(shortForm, longForm, valueVariable, null, new UniversalSet<OptionType>(), comment);
+ }
+
+ public Option(char shortForm, String longForm, String comment) {
+ this(shortForm, longForm, null, null, new UniversalSet<OptionType>(), comment);
+ }
+
+ public Option(char shortForm, String longForm, String valueVariable, OptionType defaultValue,
+ String comment) {
+ this(shortForm, longForm, valueVariable, defaultValue, new UniversalSet<OptionType>(),
+ comment);
+ }
+
+ public Option(String longForm, String valueVariable, OptionType defaultValue, String comment) {
+ this(MISSING_SHORT_FORM, longForm, valueVariable, defaultValue,
+ new UniversalSet<OptionType>(), comment);
+ }
+
+ public Option(char shortForm, String longForm) {
+ this(shortForm, longForm, null, null, new UniversalSet<OptionType>(), "");
+ }
+
+ public Option(char shortForm) {
+ this(shortForm, MISSING_LONG_FORM);
+ }
+
+ public Option(String longForm) {
+ this(MISSING_SHORT_FORM, longForm);
+ }
+
+ public Option(String longForm, String comment) {
+ this(MISSING_SHORT_FORM, longForm, comment);
+ }
+
+ public boolean isOptional() {
+ return (null != defaultValue);
+ }
+
+ public boolean isRequired() {
+ return (null == defaultValue);
+ }
+
+ public char getShortForm() {
+ return shortForm;
+ }
+
+ public String getLongForm() {
+ return longForm;
+ }
+
+ public String getComment() {
+ return comment;
+ }
+
+ void setValue(OptionType value) {
+ this.optionValue = value;
+ }
+
+ OptionType getValue() {
+ if (optionValue != null) {
+ return optionValue;
+ } else if (defaultValue != null) {
+ return defaultValue;
+ } else {
+ throw new CommandLineParserException(
+ "Unable to get value because option has not been initialized and does not have a default value: "
+ + this.toString());
+ }
+ }
+
+ boolean hasValue() {
+ return !(null == optionValue && null == defaultValue);
+ }
+
+ public String toString() {
+
+ String formattedShortForm;
+ if (shortForm == Option.MISSING_SHORT_FORM) {
+ formattedShortForm = "";
+ } else {
+ formattedShortForm = "-" + shortForm;
+ }
+
+ String formattedLongForm;
+ if (longForm.equals(Option.MISSING_LONG_FORM)) {
+ formattedLongForm = "";
+ } else {
+ formattedLongForm = "--" + longForm;
+ }
+
+ if (shortForm != Option.MISSING_SHORT_FORM && !longForm.equals(Option.MISSING_LONG_FORM)) {
+ formattedShortForm += ",";
+ }
+
+ if (valueVariable != null && valueVariable.length() >= 1) {
+ formattedLongForm += "=" + valueVariable;
+ }
+
+ String string = String.format(" %1$3s %2$-21s", formattedShortForm, formattedLongForm);
+
+ if (null != comment) {
+ string += " " + comment;
+ }
+
+ if (!(legalValues instanceof UniversalSet)) {
+ string += " " + legalValues;
+ }
+
+ return string;
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof Option) {
+ return (shortForm == ((Option) o).shortForm && longForm == ((Option) o).longForm);
+ } else {
+ return false;
+ }
+ }
+
+ public int hashCode() {
+ return (shortForm + longForm).hashCode();
+ }
+ }
+
+ static class UniversalSet<E> implements Set<E> {
+
+ public boolean add(Object o) {
+ throw new UnsupportedOperationException();
+ }
+
+ public boolean addAll(Collection c) {
+ throw new UnsupportedOperationException();
+ }
+
+ public void clear() {
+ throw new UnsupportedOperationException();
+ }
+
+ public boolean contains(Object o) {
+ return true;
+ }
+
+ public boolean containsAll(Collection c) {
+ return true;
+ }
+
+ public boolean isEmpty() {
+ return false;
+ }
+
+ public Iterator<E> iterator() {
+ return null;
+ }
+
+ public boolean remove(Object o) {
+ throw new UnsupportedOperationException();
+ }
+
+ public boolean removeAll(Collection c) {
+ throw new UnsupportedOperationException();
+ }
+
+ public boolean retainAll(Collection c) {
+ throw new UnsupportedOperationException();
+ }
+
+ public int size() {
+ return Integer.MAX_VALUE;
+ }
+
+ public Object[] toArray() {
+ return null;
+ }
+
+ public <T> T[] toArray(T[] a) {
+ return null;
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/Constants.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/Constants.java b/joshua-core/src/main/java/org/apache/joshua/util/Constants.java
new file mode 100644
index 0000000..3d4139d
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/Constants.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+/***
+ * One day, all constants should be moved here (many are in Vocabulary).
+ *
+ * @author Matt Post post@cs.jhu.edu
+ */
+
+public final class Constants {
+ public static String defaultNT = "[X]";
+
+ public static final String START_SYM = "<s>";
+ public static final String STOP_SYM = "</s>";
+ public static final String UNKNOWN_WORD = "<unk>";
+
+ public static final String fieldDelimiter = "\\s\\|{3}\\s";
+ public static final String spaceSeparator = "\\s+";
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/Counted.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/Counted.java b/joshua-core/src/main/java/org/apache/joshua/util/Counted.java
new file mode 100644
index 0000000..9f719b3
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/Counted.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+import java.util.Comparator;
+
+/**
+ * Represents an object being counted, with the associated count.
+ *
+ * @author Lane Schwartz
+ */
+public class Counted<E> implements Comparable<Counted<E>> {
+
+ /** The element being counted. */
+ private final E element;
+
+ /** The count associated with the element. */
+ private final Integer count;
+
+ /**
+ * Constructs an object wrapping an element and its associated count.
+ *
+ * @param element An element being counted
+ * @param count The count associated with the element
+ */
+ public Counted(E element, int count) {
+ this.element = element;
+ this.count = count;
+ }
+
+ /**
+ * Gets the count associated with this object's element.
+ *
+ * @return The count associated with this object's element
+ */
+ public int getCount() {
+ return count;
+ }
+
+ /**
+ * Gets the element associated with this object.
+ *
+ * @return The element associated with this object
+ */
+ public E getElement() {
+ return element;
+ }
+
+ /**
+ * Compares this object to another counted object, according to the natural order of the counts
+ * associated with each object.
+ *
+ * @param o Another counted object
+ * @return -1 if the count of this object is less than the count of the other object, 0 if the
+ * counts are equal, or 1 if the count of this object is greater than the count of the
+ * other object
+ */
+ public int compareTo(Counted<E> o) {
+ return count.compareTo(o.count);
+ }
+
+ /**
+ * Gets a comparator that compares two counted objects based on the reverse of the natural order
+ * of the counts associated with each object.
+ * @param <E> todo
+ * @return A comparator that compares two counted objects based on the reverse of the natural
+ * order of the counts associated with each object
+ */
+ public static <E> Comparator<Counted<E>> getDescendingComparator() {
+ return new Comparator<Counted<E>>() {
+ public int compare(Counted<E> o1, Counted<E> o2) {
+ return (o2.count.compareTo(o1.count));
+ }
+ };
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/util/Counts.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/util/Counts.java b/joshua-core/src/main/java/org/apache/joshua/util/Counts.java
new file mode 100644
index 0000000..89a9f38
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/util/Counts.java
@@ -0,0 +1,306 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+
+/**
+ * Maintains element co-occurrence data.
+ *
+ * @author Lane Schwartz
+ * @author Chris Callison-Burch
+ */
+public class Counts<A, B> implements Iterable<Pair<A, B>> {
+
+ /**
+ * Stores the number of times instances of A and B co-occur.
+ */
+ private Map<A, Map<B, Integer>> counts;
+
+ /** Stores the number of times instances of B occur. */
+ private Map<B, Integer> bTotals;
+
+ /** Stores relative frequency estimates for p(A | B). */
+ private Map<A, Map<B, Float>> probabilities;
+
+ /** Stores relative frequency estimates for p(B | A). */
+ private Map<B, Map<A, Float>> reverseProbabilities;
+
+ /** Stores the value to return when an unseen pair is queried. */
+ private float floorProbability;
+
+ /**
+ * Constructs an initially empty co-occurrence counter, with floor probability set to
+ * <code>Float.MIN_VALUE</code>.
+ */
+ public Counts() {
+ this(Float.MIN_VALUE);
+ }
+
+ /**
+ * Constructs an initially empty co-occurrence counter.
+ *
+ * @param floorProbability Floor probability to use when an unseen pair is queried.
+ */
+ public Counts(float floorProbability) {
+ this.floorProbability = floorProbability;
+ this.counts = new HashMap<A, Map<B, Integer>>();
+ this.bTotals = new HashMap<B, Integer>();
+ this.probabilities = new HashMap<A, Map<B, Float>>();
+ this.reverseProbabilities = new HashMap<B, Map<A, Float>>();
+ }
+
+
+ /**
+ * Increments the co-occurrence count of the provided objects.
+ *
+ * @param a input object A
+ * @param b input object B
+ */
+ public void incrementCount(A a, B b) {
+ // increment the count and handle the adding of objects to the map if they aren't already there
+ {
+ Map<B, Integer> bMap;
+ if (counts.containsKey(a)) {
+ bMap = counts.get(a);
+ } else {
+ bMap = new HashMap<B, Integer>();
+ counts.put(a, bMap);
+ }
+
+ Integer previousCount;
+ if (bMap.containsKey(b)) {
+ previousCount = bMap.get(b);
+ } else {
+ previousCount = 0;
+ }
+ bMap.put(b, previousCount + 1);
+ }
+
+ // increments total for o2.
+ {
+ Integer previousTotal;
+ if (bTotals.containsKey(b)) {
+ previousTotal = bTotals.get(b);
+ } else {
+ previousTotal = 0;
+ }
+ bTotals.put(b, previousTotal + 1);
+ }
+
+ // Invalidate previously calculated probabilities
+ {
+ if (probabilities.containsKey(a)) {
+ probabilities.get(a).clear();
+ }
+
+ if (reverseProbabilities.containsKey(b)) {
+ reverseProbabilities.get(b).clear();
+ }
+ }
+ }
+
+ /**
+ * Gets the co-occurrence count for the two elements.
+ *
+ * @param a input object A
+ * @param b input object B
+ * @return the co-occurrence count for the two elements
+ */
+ public int getCount(A a, B b) {
+
+ int count = 0;
+ if (counts.containsKey(a)) {
+ Map<B, Integer> bMap = counts.get(a);
+ if (bMap.containsKey(b)) {
+ count = bMap.get(b);
+ }
+ }
+
+ return count;
+ }
+
+ /**
+ * Gets the total number of times the specified element has been seen.
+ *
+ * @param b
+ * @return the total number of times the specified element has been seen
+ */
+ int getCount(B b) {
+
+ return (bTotals.containsKey(b) ? bTotals.get(b) : 0);
+
+ }
+
+ /**
+ * Gets the probability of a given b.
+ * <p>
+ * This value is the relative frequency estimate.
+ *
+ * @param a object A
+ * @param b object B
+ * @return the probability of a given b.
+ */
+ public float getProbability(A a, B b) {
+
+ int count = getCount(a, b);
+ int bCount = getCount(b);
+
+ Float value;
+ if (count == 0 || bCount == 0) {
+
+ value = floorProbability;
+
+ } else {
+
+ Map<B, Float> bMap;
+ if (probabilities.containsKey(a)) {
+ bMap = probabilities.get(a);
+ } else {
+ bMap = new HashMap<B, Float>();
+ }
+
+
+ if (bMap.containsKey(b)) {
+ value = bMap.get(b);
+ } else {
+ value = (float) count / (float) getCount(b);
+ bMap.put(b, value);
+ }
+
+ }
+
+ return value;
+ }
+
+ /**
+ * Gets the probability of b given a.
+ * <p>
+ * This value is the relative frequency estimate in the reverse direction.
+ *
+ * @param b object B
+ * @param a object A
+ * @return the probability of b given a.
+ */
+ public float getReverseProbability(B b, A a) {
+
+ int count = getCount(a, b);
+
+ Float value = floorProbability;
+
+ if (count > 0) {
+
+ int aCount = 0;
+ for (Integer aValue : counts.get(a).values()) {
+ aCount += aValue;
+ }
+
+ if (aCount > 0) {
+
+ Map<A, Float> aMap;
+ if (reverseProbabilities.containsKey(b)) {
+ aMap = reverseProbabilities.get(b);
+ } else {
+ aMap = new HashMap<A, Float>();
+ }
+
+ if (aMap.containsKey(a)) {
+ value = aMap.get(a);
+ } else {
+ value = (float) count / (float) aCount;
+ }
+
+ }
+
+ }
+
+ return value;
+
+ }
+
+ /**
+ * Gets the floor probability that is returned whenever an unseen pair is queried.
+ *
+ * @return The floor probability that is returned whenever an unseen pair is queried
+ */
+ public float getFloorProbability() {
+ return this.floorProbability;
+ }
+
+ public void writeExternal(ObjectOutput out) throws IOException {
+ out.writeObject(counts);
+ out.writeObject(bTotals);
+ out.writeObject(probabilities);
+ out.writeObject(reverseProbabilities);
+ out.writeFloat(floorProbability);
+ // out.close();
+ }
+
+ @SuppressWarnings("unchecked")
+ public void readExternal(ObjectInput in) throws ClassNotFoundException, IOException {
+ this.counts = (HashMap<A, Map<B, Integer>>) in.readObject();
+ this.bTotals = (HashMap<B, Integer>) in.readObject();
+ this.probabilities = (HashMap<A, Map<B, Float>>) in.readObject();
+ this.reverseProbabilities = (HashMap<B, Map<A, Float>>) in.readObject();
+ this.floorProbability = in.readFloat();
+ }
+
+ /**
+ * Gets an iterator over all counted pairs.
+ * <p>
+ * The pairs are not guaranteed to be iterated over in any particular order.
+ *
+ * @return an iterator over all counted pairs
+ */
+ public Iterator<Pair<A, B>> iterator() {
+
+ final Iterator<Entry<A, Map<B, Integer>>> aIterator = counts.entrySet().iterator();
+
+ return new Iterator<Pair<A, B>>() {
+
+ Entry<A, Map<B, Integer>> entry = null;
+ Iterator<B> bIterator = null;
+
+ public boolean hasNext() {
+ return (bIterator != null && bIterator.hasNext()) || aIterator.hasNext();
+ }
+
+ public Pair<A, B> next() {
+ if (bIterator == null || !bIterator.hasNext()) {
+ entry = aIterator.next();
+ bIterator = entry.getValue().keySet().iterator();
+ }
+
+ return new Pair<A, B>(entry.getKey(), bIterator.next());
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ };
+ }
+
+}