You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/16 06:26:18 UTC
[02/66] [partial] incubator-joshua git commit: JOSHUA-252 Make it
possible to use Maven to build Joshua
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/tools/GrammarPackerCli.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/tools/GrammarPackerCli.java b/src/main/java/org/apache/joshua/tools/GrammarPackerCli.java
new file mode 100644
index 0000000..eef65bb
--- /dev/null
+++ b/src/main/java/org/apache/joshua/tools/GrammarPackerCli.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.tools;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+import org.kohsuke.args4j.spi.StringArrayOptionHandler;
+
+public class GrammarPackerCli {
+
+ private static final Logger log = Logger.getLogger(GrammarPackerCli.class.getName());
+
+ // Input grammars to be packed (with a joint vocabulary)
+ @Option(name = "--grammars", aliases = {"-g", "-i"}, handler = StringArrayOptionHandler.class, required = true, usage = "list of grammars to pack (jointly, i.e. they share the same vocabulary)")
+ private List<String> grammars = new ArrayList<>();
+
+ // Output grammars
+ @Option(name = "--outputs", aliases = {"-p", "-o"}, handler = StringArrayOptionHandler.class, required = true, usage = "output directories of packed grammars.")
+ private List<String> outputs = new ArrayList<>();
+
+ // Output grammars
+ @Option(name = "--alignments", aliases = {"-a", "--fa"}, handler = StringArrayOptionHandler.class, required = false, usage = "alignment files")
+ private List<String> alignments_filenames = new ArrayList<>();
+
+ // Config filename
+ @Option(name = "--config_file", aliases = {"-c"}, required = false, usage = "(optional) packing configuration file")
+ private String config_filename;
+
+ @Option(name = "--dump_files", aliases = {"-d"}, handler = StringArrayOptionHandler.class, usage = "(optional) dump feature stats to file")
+ private List<String> featuredump_filenames = new ArrayList<>();
+
+ @Option(name = "--ga", usage = "whether alignments are present in the grammar")
+ private boolean grammar_alignments = false;
+
+ @Option(name = "--slice_size", aliases = {"-s"}, required = false, usage = "approximate slice size in # of rules (default=1000000)")
+ private int slice_size = 1000000;
+
+
+ private void run() throws IOException {
+
+ final List<String> missingFilenames = new ArrayList<>(grammars.size());
+ for (final String g : grammars) {
+ if (!new File(g).exists()) {
+ missingFilenames.add(g);
+ }
+ }
+ if (!missingFilenames.isEmpty()) {
+ throw new IOException("Input grammar files not found: " + missingFilenames.toString());
+ }
+
+ if (config_filename != null && !new File(config_filename).exists()) {
+ throw new IOException("Config file not found: " + config_filename);
+ }
+
+ if (!outputs.isEmpty()) {
+ if (outputs.size() != grammars.size()) {
+ throw new IOException("Must provide an output directory for each grammar");
+ }
+ final List<String> existingOutputs = new ArrayList<>(outputs.size());
+ for (final String o : outputs) {
+ if (new File(o).exists()) {
+ existingOutputs.add(o);
+ }
+ }
+ if (!existingOutputs.isEmpty()) {
+ throw new IOException("These output directories already exist (will not overwrite): " + existingOutputs.toString());
+ }
+ }
+ if (outputs.isEmpty()) {
+ for (final String g : grammars) {
+ outputs.add(g + ".packed");
+ }
+ }
+
+ if (!alignments_filenames.isEmpty()) {
+ final List<String> missingAlignmentFiles = new ArrayList<>(alignments_filenames.size());
+ for (final String a : alignments_filenames) {
+ if (!new File(a).exists()) {
+ missingAlignmentFiles.add(a);
+ }
+ }
+ if (!missingAlignmentFiles.isEmpty()) {
+ throw new IOException("Alignment files not found: " + missingAlignmentFiles.toString());
+ }
+ }
+
+ // create Packer instances for each grammar
+ final List<GrammarPacker> packers = new ArrayList<>(grammars.size());
+ for (int i = 0; i < grammars.size(); i++) {
+ log.info("Starting GrammarPacker for " + grammars.get(i));
+ final String alignment_filename = alignments_filenames.isEmpty() ? null : alignments_filenames.get(i);
+ final String featuredump_filename = featuredump_filenames.isEmpty() ? null : featuredump_filenames.get(i);
+ final GrammarPacker packer = new GrammarPacker(
+ grammars.get(i),
+ config_filename,
+ outputs.get(i),
+ alignment_filename,
+ featuredump_filename,
+ grammar_alignments,
+ slice_size);
+ packers.add(packer);
+ }
+
+ // run all packers in sequence, accumulating vocabulary items
+ for (final GrammarPacker packer : packers) {
+ log.info("Starting GrammarPacker for " + packer.getGrammar());
+ packer.pack();
+ log.info("PackedGrammar located at " + packer.getOutputDirectory());
+ }
+
+ // for each packed grammar, overwrite the internally serialized vocabulary with the current global one.
+ for (final GrammarPacker packer : packers) {
+ log.info("Writing final common Vocabulary to " + packer.getOutputDirectory());
+ packer.writeVocabulary();
+ }
+ }
+
+ public static void main(String[] args) throws IOException {
+ final GrammarPackerCli cli = new GrammarPackerCli();
+ final CmdLineParser parser = new CmdLineParser(cli);
+
+ try {
+ parser.parseArgument(args);
+ cli.run();
+ } catch (CmdLineException e) {
+ log.info(e.toString());
+ parser.printUsage(System.err);
+ System.exit(1);
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/tools/LabelPhrases.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/tools/LabelPhrases.java b/src/main/java/org/apache/joshua/tools/LabelPhrases.java
new file mode 100644
index 0000000..9733672
--- /dev/null
+++ b/src/main/java/org/apache/joshua/tools/LabelPhrases.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.tools;
+
+import java.io.IOException;
+import java.util.logging.Logger;
+
+import joshua.corpus.Vocabulary;
+import joshua.corpus.syntax.ArraySyntaxTree;
+import joshua.util.io.LineReader;
+
+/**
+ * Finds labeling for a set of phrases.
+ *
+ * @author Juri Ganitkevitch
+ */
+public class LabelPhrases {
+
+ /** Logger for this class. */
+ private static final Logger logger = Logger.getLogger(LabelPhrases.class.getName());
+
+ /**
+ * Main method.
+ *
+ * @param args names of the two grammars to be compared
+ * @throws IOException
+ * @throws NumberFormatException
+ */
+ public static void main(String[] args) throws NumberFormatException, IOException {
+
+ if (args.length < 1 || args[0].equals("-h")) {
+ System.err.println("Usage: " + LabelPhrases.class.toString());
+ System.err.println(" -p phrase_file phrase-sentence file to process");
+ System.err.println();
+ System.exit(-1);
+ }
+
+ String phrase_file_name = null;
+
+ for (int i = 0; i < args.length; i++) {
+ if ("-p".equals(args[i])) phrase_file_name = args[++i];
+ }
+ if (phrase_file_name == null) {
+ logger.severe("a phrase file is required for operation");
+ System.exit(-1);
+ }
+
+ LineReader phrase_reader = new LineReader(phrase_file_name);
+
+ while (phrase_reader.ready()) {
+ String line = phrase_reader.readLine();
+
+ String[] fields = line.split("\\t");
+ if (fields.length != 3 || fields[2].equals("()")) {
+ System.err.println("[FAIL] Empty parse in line:\t" + line);
+ continue;
+ }
+
+ String[] phrase_strings = fields[0].split("\\s");
+ int[] phrase_ids = new int[phrase_strings.length];
+ for (int i = 0; i < phrase_strings.length; i++)
+ phrase_ids[i] = Vocabulary.id(phrase_strings[i]);
+
+ ArraySyntaxTree syntax = new ArraySyntaxTree(fields[2]);
+ int[] sentence_ids = syntax.getTerminals();
+
+ int match_start = -1;
+ int match_end = -1;
+ for (int i = 0; i < sentence_ids.length; i++) {
+ if (phrase_ids[0] == sentence_ids[i]) {
+ match_start = i;
+ int j = 0;
+ while (j < phrase_ids.length && phrase_ids[j] == sentence_ids[i + j]) {
+ j++;
+ }
+ if (j == phrase_ids.length) {
+ match_end = i + j;
+ break;
+ }
+ }
+ }
+
+ int label = syntax.getOneConstituent(match_start, match_end);
+ if (label == 0) label = syntax.getOneSingleConcatenation(match_start, match_end);
+ if (label == 0) label = syntax.getOneRightSideCCG(match_start, match_end);
+ if (label == 0) label = syntax.getOneLeftSideCCG(match_start, match_end);
+ if (label == 0) label = syntax.getOneDoubleConcatenation(match_start, match_end);
+ if (label == 0) {
+ System.err.println("[FAIL] No label found in line:\t" + line);
+ continue;
+ }
+
+ System.out.println(Vocabulary.word(label) + "\t" + line);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/tools/TestSetFilter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/tools/TestSetFilter.java b/src/main/java/org/apache/joshua/tools/TestSetFilter.java
new file mode 100644
index 0000000..06cea5f
--- /dev/null
+++ b/src/main/java/org/apache/joshua/tools/TestSetFilter.java
@@ -0,0 +1,376 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.tools;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import joshua.util.io.LineReader;
+
+public class TestSetFilter {
+ private Filter filter = null;
+
+ // for caching of accepted rules
+ private String lastSourceSide;
+ private boolean acceptedLastSourceSide;
+
+ public int cached = 0;
+ public int RULE_LENGTH = 12;
+ public boolean verbose = false;
+ public boolean parallel = false;
+
+ private static final String DELIMITER = "|||";
+ private static final String DELIMITER_REGEX = " \\|\\|\\| ";
+ public static final String DELIM = String.format(" %s ", DELIMITER);
+ public static final Pattern P_DELIM = Pattern.compile(DELIMITER_REGEX);
+ private final String NT_REGEX = "\\[[^\\]]+?\\]";
+
+ public TestSetFilter() {
+ acceptedLastSourceSide = false;
+ lastSourceSide = null;
+ }
+
+ public String getFilterName() {
+ if (filter != null)
+ if (filter instanceof FastFilter)
+ return "fast";
+ else if (filter instanceof LooseFilter)
+ return "loose";
+ else
+ return "exact";
+ return "null";
+ }
+
+ public void setVerbose(boolean value) {
+ verbose = value;
+ }
+
+ public void setParallel(boolean value) {
+ parallel = value;
+ }
+
+ public void setFilter(String type) {
+ if (type.equals("fast"))
+ filter = new FastFilter();
+ else if (type.equals("exact"))
+ filter = new ExactFilter();
+ else if (type.equals("loose"))
+ filter = new LooseFilter();
+ else
+ throw new RuntimeException(String.format("Invalid filter type '%s'", type));
+ }
+
+ public void setRuleLength(int value) {
+ RULE_LENGTH = value;
+ }
+
+ private void loadTestSentences(String filename) throws IOException {
+ int count = 0;
+
+ try {
+ for (String line: new LineReader(filename)) {
+ filter.addSentence(line);
+ count++;
+ }
+ } catch (FileNotFoundException e) {
+ System.err.printf("Could not open %s\n", e.getMessage());
+ }
+
+ if (verbose)
+ System.err.println(String.format("Added %d sentences.\n", count));
+ }
+
+ /**
+ * Top-level filter, responsible for calling the fast or exact version. Takes the source side
+ * of a rule and determines whether there is any sentence in the test set that can match it.
+ */
+ public boolean inTestSet(String sourceSide) {
+ if (!sourceSide.equals(lastSourceSide)) {
+ lastSourceSide = sourceSide;
+ acceptedLastSourceSide = filter.permits(sourceSide);
+ } else {
+ cached++;
+ }
+
+ return acceptedLastSourceSide;
+ }
+
+ /**
+ * Determines whether a rule is an abstract rule. An abstract rule is one that has no terminals on
+ * its source side.
+ *
+ * If the rule is abstract, the rule's arity is returned. Otherwise, 0 is returned.
+ */
+ private boolean isAbstract(String source) {
+ int nonterminalCount = 0;
+ for (String t : source.split("\\s+")) {
+ if (!t.matches(NT_REGEX))
+ return false;
+ nonterminalCount++;
+ }
+ return nonterminalCount != 0;
+ }
+
+ private interface Filter {
+ /* Tell the filter about a sentence in the test set being filtered to */
+ public void addSentence(String sentence);
+
+ /* Returns true if the filter permits the specified source side */
+ public boolean permits(String sourceSide);
+ }
+
+ private class FastFilter implements Filter {
+ private Set<String> ngrams = null;
+
+ public FastFilter() {
+ ngrams = new HashSet<String>();
+ }
+
+ @Override
+ public boolean permits(String source) {
+ for (String chunk : source.split(NT_REGEX)) {
+ chunk = chunk.trim();
+ /* Important: you need to make sure the string isn't empty. */
+ if (!chunk.equals("") && !ngrams.contains(chunk))
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public void addSentence(String sentence) {
+ String[] tokens = sentence.trim().split("\\s+");
+ int maxOrder = RULE_LENGTH < tokens.length ? RULE_LENGTH : tokens.length;
+ for (int order = 1; order <= maxOrder; order++) {
+ for (int start = 0; start < tokens.length - order + 1; start++)
+ ngrams.add(createNGram(tokens, start, order));
+ }
+ }
+
+ private String createNGram(String[] tokens, int start, int order) {
+ if (order < 1 || start + order > tokens.length) {
+ return "";
+ }
+ String result = tokens[start];
+ for (int i = 1; i < order; i++)
+ result += " " + tokens[start + i];
+ return result;
+ }
+ }
+
+ private class LooseFilter implements Filter {
+ List<String> testSentences = null;
+
+ public LooseFilter() {
+ testSentences = new ArrayList<String>();
+ }
+
+ @Override
+ public void addSentence(String source) {
+ testSentences.add(source);
+ }
+
+ @Override
+ public boolean permits(String source) {
+ Pattern pattern = getPattern(source);
+ for (String testSentence : testSentences) {
+ if (pattern.matcher(testSentence).find()) {
+ return true;
+ }
+ }
+ return isAbstract(source);
+ }
+
+ protected Pattern getPattern(String source) {
+ String pattern = source;
+ pattern = pattern.replaceAll(String.format("\\s*%s\\s*", NT_REGEX), ".+");
+ pattern = pattern.replaceAll("\\s+", ".*");
+// System.err.println(String.format("PATTERN(%s) = %s", source, pattern));
+ return Pattern.compile(pattern);
+ }
+ }
+
+ /**
+ * This class is the same as LooseFilter except with a tighter regex for matching rules.
+ */
+ private class ExactFilter implements Filter {
+ private FastFilter fastFilter = null;
+ private Map<String, Set<Integer>> sentencesByWord;
+ List<String> testSentences = null;
+
+ public ExactFilter() {
+ fastFilter = new FastFilter();
+ sentencesByWord = new HashMap<String, Set<Integer>>();
+ testSentences = new ArrayList<String>();
+ }
+
+ @Override
+ public void addSentence(String source) {
+ fastFilter.addSentence(source);
+ addSentenceToWordHash(source, testSentences.size());
+ testSentences.add(source);
+ }
+
+ /**
+ * Always permit abstract rules. Otherwise, query the fast filter, and if that passes, apply
+ *
+ */
+ @Override
+ public boolean permits(String sourceSide) {
+ if (isAbstract(sourceSide))
+ return true;
+
+ if (fastFilter.permits(sourceSide)) {
+ Pattern pattern = getPattern(sourceSide);
+ for (int i : getSentencesForRule(sourceSide)) {
+ if (pattern.matcher(testSentences.get(i)).find()) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ protected Pattern getPattern(String source) {
+ String pattern = Pattern.quote(source);
+ pattern = pattern.replaceAll(NT_REGEX, "\\\\E.+\\\\Q");
+ pattern = pattern.replaceAll("\\\\Q\\\\E", "");
+ pattern = "(?:^|\\s)" + pattern + "(?:$|\\s)";
+ return Pattern.compile(pattern);
+ }
+
+ /*
+ * Map words to all the sentences they appear in.
+ */
+ private void addSentenceToWordHash(String sentence, int index) {
+ String[] tokens = sentence.split("\\s+");
+ for (String t : tokens) {
+ if (! sentencesByWord.containsKey(t))
+ sentencesByWord.put(t, new HashSet<Integer>());
+ sentencesByWord.get(t).add(index);
+ }
+ }
+
+ private Set<Integer> getSentencesForRule(String source) {
+ Set<Integer> sentences = null;
+ for (String token : source.split("\\s+")) {
+ if (!token.matches(NT_REGEX)) {
+ if (sentencesByWord.containsKey(token)) {
+ if (sentences == null)
+ sentences = new HashSet<Integer>(sentencesByWord.get(token));
+ else
+ sentences.retainAll(sentencesByWord.get(token));
+ }
+ }
+ }
+
+ return sentences;
+ }
+ }
+
+ public static void main(String[] argv) throws IOException {
+ // do some setup
+ if (argv.length < 1) {
+ System.err.println("usage: TestSetFilter [-v|-p|-f|-e|-l|-n N|-g grammar] test_set1 [test_set2 ...]");
+ System.err.println(" -g grammar file (can also be on STDIN)");
+ System.err.println(" -v verbose output");
+ System.err.println(" -p parallel compatibility");
+ System.err.println(" -f fast mode (default)");
+ System.err.println(" -e exact mode (slower)");
+ System.err.println(" -l loose mode");
+ System.err.println(" -n max n-gram to compare to (default 12)");
+ return;
+ }
+
+ String grammarFile = null;
+
+ TestSetFilter filter = new TestSetFilter();
+
+ for (int i = 0; i < argv.length; i++) {
+ if (argv[i].equals("-v")) {
+ filter.setVerbose(true);
+ continue;
+ } else if (argv[i].equals("-p")) {
+ filter.setParallel(true);
+ continue;
+ } else if (argv[i].equals("-g")) {
+ grammarFile = argv[++i];
+ continue;
+ } else if (argv[i].equals("-f")) {
+ filter.setFilter("fast");
+ continue;
+ } else if (argv[i].equals("-e")) {
+ filter.setFilter("exact");
+ continue;
+ } else if (argv[i].equals("-l")) {
+ filter.setFilter("loose");
+ continue;
+ } else if (argv[i].equals("-n")) {
+ filter.setRuleLength(Integer.parseInt(argv[i + 1]));
+ i++;
+ continue;
+ }
+
+ filter.loadTestSentences(argv[i]);
+ }
+
+ int rulesIn = 0;
+ int rulesOut = 0;
+ if (filter.verbose) {
+ System.err.println(String.format("Filtering rules with the %s filter...", filter.getFilterName()));
+// System.err.println("Using at max " + filter.RULE_LENGTH + " n-grams...");
+ }
+ LineReader reader = (grammarFile != null)
+ ? new LineReader(grammarFile, filter.verbose)
+ : new LineReader(System.in);
+ for (String rule: reader) {
+ rulesIn++;
+
+ String[] parts = P_DELIM.split(rule);
+ if (parts.length >= 4) {
+ // the source is the second field for thrax grammars, first field for phrasal ones
+ String source = rule.startsWith("[") ? parts[1].trim() : parts[0].trim();
+ if (filter.inTestSet(source)) {
+ System.out.println(rule);
+ if (filter.parallel)
+ System.out.flush();
+ rulesOut++;
+ } else if (filter.parallel) {
+ System.out.println("");
+ System.out.flush();
+ }
+ }
+ }
+ if (filter.verbose) {
+ System.err.println("[INFO] Total rules read: " + rulesIn);
+ System.err.println("[INFO] Rules kept: " + rulesOut);
+ System.err.println("[INFO] Rules dropped: " + (rulesIn - rulesOut));
+ System.err.println("[INFO] cached queries: " + filter.cached);
+ }
+
+ return;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/ui/Orientation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/Orientation.java b/src/main/java/org/apache/joshua/ui/Orientation.java
new file mode 100644
index 0000000..ec7b523
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/Orientation.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.ui;
+
+public enum Orientation {
+ HORIZONTAL, VERTICAL
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/ui/StartupWindow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/StartupWindow.java b/src/main/java/org/apache/joshua/ui/StartupWindow.java
new file mode 100644
index 0000000..6fc37a2
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/StartupWindow.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.ui;
+
+import java.awt.BorderLayout;
+import java.awt.Color;
+import java.awt.Font;
+import java.awt.GraphicsEnvironment;
+import java.awt.Image;
+import java.awt.Point;
+
+import javax.swing.BorderFactory;
+import javax.swing.ImageIcon;
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.JWindow;
+
+/**
+ * Startup window for Joshua programs.
+ *
+ * @author Lane Schwartz
+ * @author Aaron Phillips
+ */
+public class StartupWindow extends JWindow {
+
+ /** Serialization identifier. */
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Constructs a splash screen.
+ *
+ * @param title Title to be displayed
+ */
+ public StartupWindow(String title) {
+ this(title, "Joshua Developers", "2010", Color.BLACK, 5);
+ }
+
+ public StartupWindow(String title, String author, String year, Image image, Color borderColor,
+ int borderWidth) {
+ JPanel content = (JPanel) getContentPane();
+ content.setBackground(Color.WHITE);
+
+ int width = 250;
+ int height = 100;
+
+ Point center = GraphicsEnvironment.getLocalGraphicsEnvironment().getCenterPoint();
+ setBounds(center.x - width / 2, center.y - height / 2, width, height);
+
+ JLabel titleLabel = new JLabel(title, JLabel.CENTER);
+ titleLabel.setFont(new Font("Sans-Serif", Font.BOLD, 24));
+ content.add(titleLabel, BorderLayout.NORTH);
+
+ JLabel copyright = new JLabel("\u24D2 " + year + " - " + author, JLabel.CENTER);
+ copyright.setFont(new Font("Sans-Serif", Font.PLAIN, 8));
+ content.add(copyright, BorderLayout.SOUTH);
+
+ if (image != null) {
+ content.add(new JLabel(new ImageIcon(image)));
+ }
+
+ content.setBorder(BorderFactory.createLineBorder(borderColor, borderWidth));
+
+ // Display it
+ setVisible(true);
+ }
+
+ public StartupWindow(String title, String author, String year, Color borderColor, int borderWidth) {
+ this(title, author, year, null, borderColor, borderWidth);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/ui/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/package.html b/src/main/java/org/apache/joshua/ui/package.html
new file mode 100644
index 0000000..2dcc44e
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/package.html
@@ -0,0 +1,25 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head></head>
+<body bgcolor="white">
+
+<!--
+##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
+##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
+##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
+-->
+
+Provides classes for visualizing parts of the translation process.
+
+<!--
+<h2>Related Documentation</h2>
+
+<ul>
+ <li>Much of the code in this package is based on .....
+</ul>
+-->
+
+<!-- Put @see and @since tags down here. -->
+
+</body>
+</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationTree.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationTree.java b/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationTree.java
new file mode 100644
index 0000000..86b9618
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationTree.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.ui.tree_visualizer;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Collections;
+
+import joshua.ui.tree_visualizer.tree.Tree;
+
+import edu.uci.ics.jung.graph.DirectedOrderedSparseMultigraph;
+import edu.uci.ics.jung.graph.util.EdgeType;
+import edu.uci.ics.jung.graph.util.Pair;
+
+public class DerivationTree extends DirectedOrderedSparseMultigraph<Node, DerivationTreeEdge> {
+ /**
+ * Eclipse thinks this is necessary.
+ */
+ private static final long serialVersionUID = 2914449263979566324L;
+
+ public final Node root;
+ public final Node sourceRoot;
+
+ public DerivationTree(Tree t, String source) {
+ final Tree.Node treeRoot = t.root();
+ final String rootLabel = treeRoot.label();
+ root = new Node(rootLabel, false);
+ sourceRoot = new Node(rootLabel, true);
+ addVertex(root);
+ addVertex(sourceRoot);
+ addSubtreeRootedAt(root, treeRoot);
+ final String[] sourceWords = source.split("\\s+");
+ addSourceSubtreeRootedAt(sourceRoot, treeRoot, 0, sourceWords.length, sourceWords);
+ }
+
+ private void addSubtreeRootedAt(Node n, Tree.Node tn) {
+ for (Tree.Node child : tn.children()) {
+ Node childNode = new Node(child.label(), false);
+ addVertex(childNode);
+ addEdge(new DerivationTreeEdge(false), new Pair<Node>(n, childNode), EdgeType.DIRECTED);
+ addSubtreeRootedAt(childNode, child);
+ }
+ }
+
+ private void addSourceSubtreeRootedAt(Node n, Tree.Node tn, int firstIndex, int lastIndex,
+ String[] sourceWords) {
+ int nextUncoveredIndex = firstIndex;
+ Tree.NodeSourceStartComparator cmp = new Tree.NodeSourceStartComparator();
+ List<Tree.Node> children = tn.children();
+ Collections.sort(children, cmp);
+ for (Tree.Node child : children) {
+ if (child.isLeaf()) {
+ continue;
+ }
+ int sourceStartIndex = child.sourceStartIndex();
+ int sourceEndIndex = child.sourceEndIndex();
+ if (sourceStartIndex > nextUncoveredIndex) {
+ insertSourceLeaf(n, sourceWords, nextUncoveredIndex, sourceStartIndex);
+ }
+ Node childNode = new Node(child.label(), true);
+ addEdge(new DerivationTreeEdge(true), new Pair<Node>(n, childNode), EdgeType.DIRECTED);
+ nextUncoveredIndex = sourceEndIndex;
+ addSourceSubtreeRootedAt(childNode, child, sourceStartIndex, sourceEndIndex, sourceWords);
+ }
+ if (nextUncoveredIndex < lastIndex) {
+ insertSourceLeaf(n, sourceWords, nextUncoveredIndex, lastIndex);
+ }
+ }
+
+ private void insertSourceLeaf(Node n, String[] words, int start, int end) {
+ final String[] leafWords = Arrays.copyOfRange(words, start, end);
+ String label = leafWords[0];
+ for (int i = 1; i < leafWords.length; i++) {
+ label += " " + leafWords[i];
+ }
+ Node childNode = new Node(label, true);
+ addEdge(new DerivationTreeEdge(true), new Pair<Node>(n, childNode), EdgeType.DIRECTED);
+ }
+
+ public void setSubtreeHighlight(Node n, boolean b) {
+ n.isHighlighted = b;
+ for (Node s : getSuccessors(n)) {
+ setSubtreeHighlight(s, b);
+ }
+ return;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationTreeEdge.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationTreeEdge.java b/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationTreeEdge.java
new file mode 100644
index 0000000..b457f95
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationTreeEdge.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.ui.tree_visualizer;
+
+public class DerivationTreeEdge {
+ public final boolean pointsToSource;
+
+ public DerivationTreeEdge(boolean pts) {
+ pointsToSource = pts;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationTreeTransformer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationTreeTransformer.java b/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationTreeTransformer.java
new file mode 100644
index 0000000..9bdeefe
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationTreeTransformer.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.ui.tree_visualizer;
+
+import java.awt.Dimension;
+import java.awt.geom.Point2D;
+
+import org.apache.commons.collections15.Transformer;
+
+import edu.uci.ics.jung.algorithms.layout.TreeLayout;
+import edu.uci.ics.jung.graph.DelegateForest;
+
+public class DerivationTreeTransformer implements Transformer<Node, Point2D> {
+ private TreeLayout<Node, DerivationTreeEdge> treeLayout;
+ private DerivationTree graph;
+ private Node root;
+ private Node sourceRoot;
+
+ private boolean isAnchored;
+ private Point2D anchorPoint;
+
+ private double Y_DIST;
+ private double X_DIST;
+
+
+ public DerivationTreeTransformer(DerivationTree t, Dimension d, boolean isAnchored) {
+ this.isAnchored = isAnchored;
+ anchorPoint = new Point2D.Double(0, 0);
+ graph = t;
+ DelegateForest<Node, DerivationTreeEdge> del = new DelegateForest<Node, DerivationTreeEdge>(t);
+ del.setRoot(t.root);
+ del.setRoot(t.sourceRoot);
+ root = t.root;
+ sourceRoot = t.sourceRoot;
+ Y_DIST = d.getHeight() / (2 * (1 + distanceToLeaf(root)));
+ int leafCount = 0;
+ for (Node n : t.getVertices()) {
+ if (t.outDegree(n) == 0) leafCount++;
+ }
+ X_DIST = d.getWidth() / leafCount;
+
+ treeLayout = new TreeLayout<Node, DerivationTreeEdge>(del, (int) Math.round(X_DIST));
+ }
+
+ public Point2D transform(Node n) {
+ double x, y;
+ Point2D t = treeLayout.transform(n);
+ if (n.isSource) {
+ x =
+ /* treeLayout.transform(root).getX() + */(t.getX()
+ - treeLayout.transform(sourceRoot).getX() + treeLayout.transform(root).getX());
+ y = Y_DIST * (distanceToLeaf(n) + 1);
+ } else {
+ x = t.getX();
+ y = Y_DIST * (-1) * distanceToLeaf(n);
+ }
+ if (isAnchored) {
+ x += anchorPoint.getX();
+ y += anchorPoint.getY();
+ }
+ return new Point2D.Double(x, y + Y_DIST * (1 + distanceToLeaf(root)));
+ }
+
+ private int distanceToLeaf(Node n) {
+ if (graph.getSuccessors(n).isEmpty()) return 0;
+ int result = 0;
+ for (Object x : graph.getSuccessors(n)) {
+ int tmp = distanceToLeaf((Node) x);
+ if (tmp > result) result = tmp;
+ }
+ return 1 + result;
+ }
+
+ public Dimension getSize() {
+ int height = (int) Math.round(2 * Y_DIST * (1 + distanceToLeaf(root)));
+ int width = (int) Math.round(2 * treeLayout.transform(root).getX());
+ Dimension ret = new Dimension(width, height);
+ return ret;
+ }
+
+ public Point2D getAnchorPosition(DerivationViewer.AnchorType type) {
+ switch (type) {
+ case ANCHOR_ROOT:
+ return transform(root);
+ case ANCHOR_LEFTMOST_LEAF:
+ Node n = root;
+ while (graph.getSuccessorCount(n) != 0)
+ n = (Node) graph.getSuccessors(n).toArray()[0];
+ return transform(n);
+ default:
+ return new Point2D.Double(0, 0);
+ }
+ }
+
+ public void setAnchorPoint(DerivationViewer.AnchorType type, Point2D viewerAnchor) {
+ Point2D oldAnchor = getAnchorPosition(type);
+ double x = viewerAnchor.getX() - oldAnchor.getX();
+ double y = viewerAnchor.getY() - oldAnchor.getY();
+ anchorPoint = new Point2D.Double(x, y);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationViewer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationViewer.java b/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationViewer.java
new file mode 100644
index 0000000..cc8a701
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationViewer.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.ui.tree_visualizer;
+
+import java.awt.BasicStroke;
+import java.awt.Color;
+import java.awt.Dimension;
+import java.awt.Paint;
+import java.awt.Shape;
+import java.awt.Stroke;
+import java.awt.geom.Point2D;
+import java.awt.geom.Rectangle2D;
+
+import javax.swing.JLabel;
+
+import org.apache.commons.collections15.Transformer;
+
+import edu.uci.ics.jung.algorithms.layout.CircleLayout;
+import edu.uci.ics.jung.algorithms.layout.StaticLayout;
+import edu.uci.ics.jung.visualization.VisualizationViewer;
+import edu.uci.ics.jung.visualization.control.DefaultModalGraphMouse;
+import edu.uci.ics.jung.visualization.control.LayoutScalingControl;
+import edu.uci.ics.jung.visualization.control.ModalGraphMouse;
+import edu.uci.ics.jung.visualization.decorators.ToStringLabeller;
+import edu.uci.ics.jung.visualization.renderers.Renderer.VertexLabel.Position;
+
+@SuppressWarnings("serial")
+public class DerivationViewer extends VisualizationViewer<Node, DerivationTreeEdge> {
+ public static final int DEFAULT_HEIGHT = 500;
+ public static final int DEFAULT_WIDTH = 500;
+ public static final Color SRC = Color.WHITE;
+ private Color TGT;
+
+ public static final Color HIGHLIGHT = Color.pink;
+
+ public static enum AnchorType {
+ ANCHOR_ROOT, ANCHOR_LEFTMOST_LEAF
+ };
+
+ private AnchorType anchorStyle;
+ private Point2D anchorPoint;
+
+ public DerivationViewer(DerivationTree g, Dimension d, Color targetColor, AnchorType anchor) {
+ super(new CircleLayout<Node, DerivationTreeEdge>(g));
+ anchorStyle = anchor;
+ DerivationTreeTransformer dtt = new DerivationTreeTransformer(g, d, false);
+ StaticLayout<Node, DerivationTreeEdge> derivationLayout =
+ new StaticLayout<Node, DerivationTreeEdge>(g, dtt);
+ // derivationLayout.setSize(dtt.getSize());
+ setGraphLayout(derivationLayout);
+ scaleToLayout(new LayoutScalingControl());
+ // g.addCorrespondences();
+ setPreferredSize(new Dimension(DEFAULT_HEIGHT, DEFAULT_WIDTH));
+ getRenderContext().setVertexLabelTransformer(new ToStringLabeller<Node>());
+
+ DefaultModalGraphMouse<Node, DerivationTreeEdge> graphMouse =
+ new DefaultModalGraphMouse<Node, DerivationTreeEdge>();
+ graphMouse.setMode(ModalGraphMouse.Mode.TRANSFORMING);
+ setGraphMouse(graphMouse);
+ addKeyListener(graphMouse.getModeKeyListener());
+ // this.setPickedVertexState(new DerivationTreePickedState(g));
+
+ getRenderContext().setVertexFillPaintTransformer(vp);
+ getRenderContext().setEdgeStrokeTransformer(es);
+ getRenderContext().setVertexShapeTransformer(ns);
+ getRenderer().getVertexLabelRenderer().setPosition(Position.CNTR);
+
+ TGT = targetColor;
+ anchorPoint = dtt.getAnchorPosition(anchorStyle);
+ }
+
+ public void setGraph(DerivationTree tree) {
+ DerivationTreeTransformer dtt = new DerivationTreeTransformer(tree, getSize(), true);
+ dtt.setAnchorPoint(anchorStyle, anchorPoint);
+ setGraphLayout(new StaticLayout<Node, DerivationTreeEdge>(tree, dtt));
+ }
+
+ private Transformer<Node, Paint> vp = new Transformer<Node, Paint>() {
+ public Paint transform(Node n) {
+ if (n.isHighlighted) return HIGHLIGHT;
+ if (n.isSource)
+ return SRC;
+ else
+ return TGT;
+ }
+ };
+
+ private static Transformer<DerivationTreeEdge, Stroke> es =
+ new Transformer<DerivationTreeEdge, Stroke>() {
+ public Stroke transform(DerivationTreeEdge e) {
+ if (e.pointsToSource) {
+ return new BasicStroke(1.0f,
+ BasicStroke.CAP_BUTT,
+ BasicStroke.JOIN_MITER,
+ 10.0f,
+ new float[] {10.0f},
+ 0.0f);
+ } else {
+ return new BasicStroke(1.0f);
+ }
+ }
+ };
+
+ private static Transformer<Node, Shape> ns = new Transformer<Node, Shape>() {
+ public Shape transform(Node n) {
+ JLabel x = new JLabel();
+ double len = x.getFontMetrics(x.getFont()).stringWidth(n.toString());
+ double margin = 5.0;
+ return new Rectangle2D.Double((len + margin) / (-2), 0, len + 2 * margin, 20);
+ }
+ };
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationViewerApplet.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationViewerApplet.java b/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationViewerApplet.java
new file mode 100644
index 0000000..7904e8e
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/tree_visualizer/DerivationViewerApplet.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.ui.tree_visualizer;
+
+import java.awt.Color;
+
+import javax.swing.JApplet;
+
+import joshua.ui.tree_visualizer.tree.Tree;
+
+/**
+ * An applet for viewing DerivationTrees. It consists of a DerivationViewer inside of the applet's
+ * Panel.
+ *
+ * @author Jonathan Weese
+ *
+ */
+@SuppressWarnings("serial")
+public class DerivationViewerApplet extends JApplet {
+ /**
+ * Initializes the applet by getting the source sentence and the tree representation from the
+ * applet tag in a web page.
+ */
+ public void init() {
+ String source = getParameter("sourceSentence");
+ String derivation = getParameter("derivationTree");
+ Tree tree = new Tree(derivation);
+
+ add(new DerivationViewer(new DerivationTree(tree, source),
+ getSize(),
+ Color.red,
+ DerivationViewer.AnchorType.ANCHOR_ROOT));
+ return;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/ui/tree_visualizer/Node.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/tree_visualizer/Node.java b/src/main/java/org/apache/joshua/ui/tree_visualizer/Node.java
new file mode 100644
index 0000000..846fc71
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/tree_visualizer/Node.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.ui.tree_visualizer;
+
+/**
+ * A representation of a node in a derivation tree. The derivation tree class itself is
+ * parameterized in terms of this class and the <code>DerivationEdge</code> class. A
+ * <code>Node</code> may represent either a non-terminal symbol or one or more terminal symbols of
+ * the derivation.
+ */
+public class Node {
+ /**
+ * The label to be shown on the node. If the node is a non-terminal symbol, it is the name of the
+ * symbol. Otherwise, it is terminal symbols joined with spaces.
+ */
+ public final String label;
+
+ /**
+ * Indicates whether this node is part of the source-side of target- side derivation tree.
+ */
+ public final boolean isSource;
+
+ /**
+ * A boolean to let the renderer know whether this vertex is highlighted.
+ */
+ public boolean isHighlighted = false;
+
+ /**
+ * Constructor used for root nodes or nodes whose parent is not given.
+ *
+ * @param label a <code>String</code> that represents the symbols at this node
+ * @param isSource a boolean saying whether this is a source-side node
+ */
+ public Node(String label, boolean isSource) {
+ this.label = label;
+ this.isSource = isSource;
+ }
+
+ @Override
+ public String toString() {
+ return label;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java b/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java
new file mode 100644
index 0000000..bd5b592
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/Browser.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.ui.tree_visualizer.browser;
+
+import joshua.ui.tree_visualizer.tree.Tree;
+import joshua.util.io.LineReader;
+
+import java.awt.BorderLayout;
+import java.awt.Color;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Scanner;
+
+import javax.swing.DefaultListModel;
+import javax.swing.JFrame;
+import javax.swing.JList;
+import javax.swing.JScrollPane;
+import javax.swing.JTextField;
+import javax.swing.ListSelectionModel;
+import javax.swing.event.ListSelectionEvent;
+import javax.swing.event.ListSelectionListener;
+import javax.swing.event.DocumentListener;
+import javax.swing.event.DocumentEvent;
+
+public class Browser {
+
+ /**
+ * A list that contains the one best translation of each source sentence.
+ */
+ private static JList oneBestList;
+
+ private static JTextField searchBox;
+
+ /**
+ * The current frame that displays a derivation tree.
+ */
+ private static List<DerivationTreeFrame> activeFrame;
+
+ private static List<TranslationInfo> translations;
+ /**
+ * Default width of the chooser frame.
+ */
+ private static final int DEFAULT_WIDTH = 640;
+
+ /**
+ * Default height of the chooser frame.
+ */
+ private static final int DEFAULT_HEIGHT = 480;
+
+ /**
+ * List of colors to be used in derivation trees
+ */
+ static final Color[] dataSetColors = { Color.red, Color.orange, Color.blue, Color.green };
+
+ /**
+ * @param args the paths to the source, reference, and n-best files
+ */
+ public static void main(String[] argv) throws IOException {
+ String sourcePath = argv.length > 0 ? argv[0] : null;
+ String referencePath = argv.length > 1 ? argv[1] : null;
+ String[] translationPaths = new String[0];
+ if (argv.length > 2) {
+ translationPaths = Arrays.copyOfRange(argv, 2, argv.length);
+ }
+ translations = new ArrayList<TranslationInfo>();
+ readSourcesFromPath(sourcePath);
+ readReferencesFromPath(referencePath);
+ for (String tp : translationPaths) {
+ readTranslationsFromPath(tp);
+ }
+ initializeChooserFrame();
+ return;
+ }
+
+ private static void readSourcesFromPath(String path) throws IOException {
+ for (String line: new LineReader(path)) {
+ TranslationInfo ti = new TranslationInfo();
+ ti.setSourceSentence("<s> " + line + " </s>");
+ translations.add(ti);
+ }
+ }
+
+ private static void readReferencesFromPath(String path) throws IOException {
+ Scanner scanner = new Scanner(new File(path), "UTF-8");
+ for (TranslationInfo ti : translations) {
+ if (scanner.hasNextLine()) {
+ ti.setReference(scanner.nextLine());
+ }
+ }
+ scanner.close();
+ }
+
+ private static void readTranslationsFromPath(String path) throws IOException {
+ Scanner scanner = new Scanner(new File(path), "UTF-8");
+ String sentenceIndex = null;
+ for (TranslationInfo ti : translations) {
+ while (scanner.hasNextLine()) {
+ final String[] fields = scanner.nextLine().split("\\|\\|\\|");
+ final String index = fields[0];
+ final String tree = fields[1].trim();
+ if (!index.equals(sentenceIndex)) {
+ sentenceIndex = index;
+ ti.translations().add(new Tree(tree));
+ break;
+ }
+ }
+ }
+ scanner.close();
+ }
+
+ /**
+ * Initializes the various JComponents in the chooser frame.
+ */
+ private static void initializeChooserFrame() {
+ JFrame chooserFrame = new JFrame("Joshua Derivation Tree Browser");
+ chooserFrame.setLayout(new BorderLayout());
+
+ /*
+ * JMenuBar mb = new JMenuBar(); JMenu openMenu = new JMenu("Control"); JMenuItem src = new
+ * JMenuItem("Open source file ..."); JMenuItem ref = new JMenuItem("Open reference file ...");
+ * JMenuItem tgt = new JMenuItem("Open n-best derivations file ..."); JMenuItem quit = new
+ * JMenuItem("Quit");
+ *
+ * new FileChoiceListener(chooserFrame, src, ref, tgt);
+ *
+ * quit.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) {
+ * System.exit(0); } }); openMenu.add(src); openMenu.add(ref); openMenu.add(tgt);
+ * openMenu.add(quit); mb.add(openMenu); chooserFrame.setJMenuBar(mb);
+ */
+
+ searchBox = new JTextField("search");
+ searchBox.getDocument().addDocumentListener(new SearchListener());
+ searchBox.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ final int selectedIndex = oneBestList.getSelectedIndex();
+ Browser.search(selectedIndex < 0 ? 0 : selectedIndex + 1);
+ }
+ });
+ oneBestList = new JList(new DefaultListModel());
+ oneBestList.setFixedCellWidth(200);
+ oneBestList.setSelectionMode(ListSelectionModel.SINGLE_SELECTION);
+ // oneBestList.setCellRenderer(new DerivationBrowserListCellRenderer());
+
+ oneBestList.addListSelectionListener(new ListSelectionListener() {
+ public void valueChanged(ListSelectionEvent e) {
+ for (DerivationTreeFrame frame : activeFrame) {
+ frame.drawGraph(translations.get(oneBestList.getSelectedIndex()));
+ }
+ return;
+ }
+ });
+ chooserFrame.getContentPane().add(searchBox, BorderLayout.NORTH);
+ chooserFrame.getContentPane().add(new JScrollPane(oneBestList), BorderLayout.CENTER);
+
+ refreshLists();
+ chooserFrame.setSize(DEFAULT_WIDTH, DEFAULT_HEIGHT);
+ chooserFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+
+ activeFrame = new ArrayList<DerivationTreeFrame>();
+ int numNBestFiles = translations.get(0).translations().size();
+ for (int i = 0; i < numNBestFiles; i++)
+ activeFrame.add(new DerivationTreeFrame(i, oneBestList));
+ chooserFrame.setVisible(true);
+ return;
+ }
+
+ /**
+ * Removes and re-adds the appropriate values to the reference and one-best lists.
+ */
+ private static void refreshLists() {
+ oneBestList.removeAll();
+ DefaultListModel oneBestListModel = (DefaultListModel) oneBestList.getModel();
+ for (TranslationInfo ti : translations) {
+ oneBestListModel.addElement(ti.reference());
+ }
+ return;
+ }
+
+ private static void search(int fromIndex) {
+ final String query = searchBox.getText();
+ DefaultListModel oneBestListModel = (DefaultListModel) oneBestList.getModel();
+ for (int i = fromIndex; i < oneBestListModel.getSize(); i++) {
+ String reference = (String) oneBestListModel.getElementAt(i);
+ if (reference.indexOf(query) != -1) {
+ // found the query
+ oneBestList.setSelectedIndex(i);
+ oneBestList.ensureIndexIsVisible(i);
+ searchBox.setBackground(Color.white);
+ return;
+ }
+ }
+ searchBox.setBackground(Color.red);
+ }
+
+ private static class SearchListener implements DocumentListener {
+
+ public void insertUpdate(DocumentEvent e) {
+ final int selectedIndex = oneBestList.getSelectedIndex();
+ Browser.search(selectedIndex < 0 ? 0 : selectedIndex);
+ }
+
+ public void removeUpdate(DocumentEvent e) {
+ final String query = searchBox.getText();
+ if (query.equals("")) {
+ return;
+ } else {
+ insertUpdate(e);
+ }
+ }
+
+ public void changedUpdate(DocumentEvent e) {
+
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/DerivationTreeFrame.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/DerivationTreeFrame.java b/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/DerivationTreeFrame.java
new file mode 100644
index 0000000..a08b370
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/DerivationTreeFrame.java
@@ -0,0 +1,253 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.ui.tree_visualizer.browser;
+
+import java.awt.BorderLayout;
+import java.awt.Color;
+import java.awt.GridLayout;
+import java.awt.event.ActionEvent;
+import java.awt.event.ActionListener;
+
+import javax.swing.JButton;
+import javax.swing.JFrame;
+import javax.swing.JLabel;
+import javax.swing.JPanel;
+import javax.swing.JList;
+
+import joshua.ui.tree_visualizer.DerivationTree;
+import joshua.ui.tree_visualizer.DerivationViewer;
+import joshua.ui.tree_visualizer.tree.Tree;
+
+/**
+ * A frame that displays a derivation tree.
+ *
+ * @author jonny
+ *
+ */
+class DerivationTreeFrame extends JFrame {
+ /**
+ * Eclipse seems to think serialVersionUID is important. I don't know why.
+ */
+ private static final long serialVersionUID = -3173826443907629130L;
+
+ /**
+ * A button to move to the next source-side sentence in the file.
+ */
+ JButton nextSource;
+ /**
+ * A button to move to the previous source-side sentence in the file.
+ */
+ JButton previousSource;
+
+ /**
+ * A button to show or hide extra information about the derivation.
+ */
+ private JButton informationButton;
+
+ /**
+ * A panel holding the extra information about the derivation.
+ */
+ private JPanel informationPanel;
+
+ /**
+ * A label holding the current source sentence.
+ */
+ private JLabel sourceLabel;
+
+ /**
+ * A label holding the reference translation of the current source sentence.
+ */
+ private JLabel referenceLabel;
+
+ /**
+ * A label holding the one-best translation of the current source sentence.
+ */
+ private JLabel oneBestLabel;
+
+ /**
+ * A panel that holds the buttons, as well as labels to show which derivation
+ * is currently being displayed.
+ */
+ private JPanel controlPanel;
+ /**
+ * A panel used to display the derivation tree itself.
+ */
+ private JPanel viewPanel;
+
+ /**
+ * This component displays the derivation tree's JUNG graph.
+ */
+ private DerivationViewer dv;
+
+ /**
+ * Index to determine which data set (which n-best file) this frame brings its
+ * graphs from.
+ */
+ private final int dataSetIndex;
+
+ private static final int DEFAULT_WIDTH = 640;
+ private static final int DEFAULT_HEIGHT = 480;
+
+ /**
+ * Color to use to render target-side trees.
+ */
+ private Color targetColor;
+
+ private JList mainList;
+
+ /**
+ * The default constructor.
+ */
+ public DerivationTreeFrame(int index, JList mainList) {
+ super("Joshua Derivation Tree");
+ this.mainList = mainList;
+ setLayout(new BorderLayout());
+ setSize(DEFAULT_WIDTH, DEFAULT_HEIGHT);
+ controlPanel = new JPanel(new BorderLayout());
+ informationPanel = new JPanel(new GridLayout(3, 1));
+
+ sourceLabel = new JLabel("source sentence");
+ referenceLabel = new JLabel("reference translation");
+ oneBestLabel = new JLabel("one best translation");
+
+ informationPanel.add(sourceLabel);
+ informationPanel.add(referenceLabel);
+ informationPanel.add(oneBestLabel);
+ informationPanel.setVisible(false);
+
+ controlPanel.add(informationPanel, BorderLayout.SOUTH);
+
+ initializeButtons();
+ layoutControl();
+
+ viewPanel = new JPanel(new BorderLayout());
+ dv = null;
+
+ dataSetIndex = index;
+ targetColor = Browser.dataSetColors[dataSetIndex % Browser.dataSetColors.length];
+
+ getContentPane().add(viewPanel, BorderLayout.CENTER);
+ getContentPane().add(controlPanel, BorderLayout.SOUTH);
+ // drawGraph();
+ setVisible(true);
+ }
+
+ /**
+ * Lays out the control buttons of this frame.
+ */
+ private void layoutControl() {
+ /*
+ * JPanel ctlLeft = new JPanel(new GridLayout(2, 1)); JPanel ctlCenter = new
+ * JPanel(new GridLayout(2, 1)); JPanel ctlRight = new JPanel(new
+ * GridLayout(2, 1));
+ *
+ * controlPanel.add(ctlLeft, BorderLayout.WEST); controlPanel.add(ctlCenter,
+ * BorderLayout.CENTER); controlPanel.add(ctlRight, BorderLayout.EAST);
+ *
+ * ctlLeft.add(previousSource); ctlRight.add(nextSource);
+ */
+
+ controlPanel.add(previousSource, BorderLayout.WEST);
+ controlPanel.add(nextSource, BorderLayout.EAST);
+ controlPanel.add(informationButton, BorderLayout.CENTER);
+ return;
+ }
+
+ /**
+ * Initializes the control buttons of this frame.
+ */
+ private void initializeButtons() {
+ nextSource = new JButton(">");
+ previousSource = new JButton("<");
+ informationButton = new JButton("More Information");
+
+ nextSource.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ int index = mainList.getSelectedIndex();
+ mainList.setSelectedIndex(index + 1);
+ return;
+ }
+ });
+ previousSource.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ int index = mainList.getSelectedIndex();
+ if (index > 0) {
+ mainList.setSelectedIndex(index - 1);
+ }
+ return;
+ }
+ });
+ informationButton.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent e) {
+ JButton source = (JButton) e.getSource();
+ if (informationPanel.isVisible()) {
+ source.setText("More Information");
+ informationPanel.setVisible(false);
+ } else {
+ source.setText("Less Information");
+ informationPanel.setVisible(true);
+ }
+ return;
+ }
+ });
+ return;
+ }
+
+ /**
+ * Displays the derivation tree for the current candidate translation. The
+ * current candidate translation is whichever translation is currently
+ * highlighted in the Derivation Browser's chooser frame.
+ */
+ public void drawGraph(TranslationInfo ti) {
+ viewPanel.removeAll();
+ String src = ti.sourceSentence();
+ Tree tgt = ti.translations().get(dataSetIndex);
+ String ref = ti.reference();
+
+ sourceLabel.setText(src);
+ referenceLabel.setText(ref);
+ oneBestLabel.setText(tgt.yield());
+
+ DerivationTree tree = new DerivationTree(tgt, src);
+ if (dv == null) {
+ dv = new DerivationViewer(tree, viewPanel.getSize(), targetColor,
+ DerivationViewer.AnchorType.ANCHOR_LEFTMOST_LEAF);
+ } else {
+ dv.setGraph(tree);
+ }
+ viewPanel.add(dv, BorderLayout.CENTER);
+ dv.revalidate();
+ repaint();
+ getContentPane().repaint();
+ return;
+ }
+
+ /**
+ * Makes this frame unmodifiable, so that the tree it displays cannot be
+ * changed. In fact, all that happens is the title is update and the
+ * navigation buttons are disabled. This method is intended to prevent the
+ * user from modifying the frame, not to prevent other code from modifying it.
+ */
+ public void disableNavigationButtons() {
+ setTitle(getTitle() + " (fixed)");
+ nextSource.setEnabled(false);
+ previousSource.setEnabled(false);
+ return;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/TranslationInfo.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/TranslationInfo.java b/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/TranslationInfo.java
new file mode 100644
index 0000000..8fde26f
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/tree_visualizer/browser/TranslationInfo.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.ui.tree_visualizer.browser;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import joshua.ui.tree_visualizer.tree.Tree;
+
+class TranslationInfo {
+ private String sourceSentence;
+ private String reference;
+ private ArrayList<Tree> translations;
+
+ public TranslationInfo() {
+ translations = new ArrayList<Tree>();
+ }
+
+ public String sourceSentence() {
+ return sourceSentence;
+ }
+
+ public void setSourceSentence(String src) {
+ sourceSentence = src;
+ return;
+ }
+
+ public String reference() {
+ return reference;
+ }
+
+ public void setReference(String ref) {
+ reference = ref;
+ return;
+ }
+
+ public List<Tree> translations() {
+ return translations;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java b/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java
new file mode 100644
index 0000000..409e30a
--- /dev/null
+++ b/src/main/java/org/apache/joshua/ui/tree_visualizer/tree/Tree.java
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.ui.tree_visualizer.tree;
+
+import java.util.Stack;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Comparator;
+
+/**
+ * A class to represent the target-side tree produced by decoding using Joshua
+ * with an SCFG.
+ * <p>
+ * When decoding with use_tree_nbest=true, instead of a flat text output like
+ * "i asked her a question", we get a Penn treebank format tree like
+ * "(ROOT (S (NP i) (VP (V asked) (NP her) (NP (DT a) (N question)))))".
+ * If we also set include_align_index=true, we include source-side alignments
+ * for each internal node of the tree.
+ * <p>
+ * So, if the source input sentence is "je lui ai pose un question", if we
+ * turn on both configuration options, we end up with a decorated tree like
+ * this:
+ * "(ROOT{0-6} (S{0-6} (NP{0-1} i) (VP{1-6} (V{2-4} asked) (NP{1-2} her)
+ * (NP{4-6} (DT{4-5} a) (N{5-6} question)))))".
+ * <p>
+ * This class contains all the information of that flat string representation:
+ * the tree structure, the output (English) words, and the alignments to a
+ * source sentence.
+ * <p>
+ * Using a Tree the source sentence it was aligned to, we can create
+ * a DerivationTree object suitable for display.
+ *
+ * @author Jonny Weese <jo...@cs.jhu.edu>
+ */
+public class Tree {
+
+ /**
+ * An array holding the label of each node of the tree, in depth-first order.
+ * The label of a node means the NT label assigned to an internal node, or
+ * the terminal symbol (English word) at a leaf.
+ */
+ private final String [] labels;
+
+ /**
+ * The number of children of each node of the tree, in depth-first order.
+ */
+ private final int [] numChildren;
+
+ /**
+ * The smallest source-side index that each node covers, in depth-first order.
+ * Note that we only have this information for internal nodes. For leaves,
+ * this value will always be -1.
+ */
+ private final int [] sourceStartIndices;
+
+ /**
+ * 1 + the largest source-side index that each node covers, in depth-first
+ * order. Note that we only have this informaion for internal nodes. For
+ * leaves, this value will always be -1.
+ */
+ private final int [] sourceEndIndices;
+
+ /**
+ * A pattern to match an aligned internal node and pull out its information.
+ * This pattern matches:
+ *
+ * 1) start-of-string
+ * 2) (
+ * 3) an arbitrary sequence of non-whitespace characters (at least 1)
+ * 4) {
+ * 5) a decimal number
+ * 6) -
+ * 7) a decimal number
+ * 8) }
+ * 9) end-of-string
+ *
+ * That is, it matches something like "(FOO{32-55}". The string and two
+ * decimal numbers (parts 3, 5, and 7) are captured in groups.
+ */
+ private static final Pattern NONTERMINAL_PATTERN =
+ Pattern.compile("^\\((\\S+)\\{(\\d+)-(\\d+)\\}$");
+
+ /**
+ * Creates a Tree object from an input string in Penn treebank format with
+ * source alignment annotations.
+ */
+ public Tree(String s) {
+ final String [] tokens = s.replaceAll("\\)", " )").split("\\s+");
+ int numNodes = 0;
+ for (String t : tokens) {
+ if (!t.equals(")")) {
+ numNodes++;
+ }
+ }
+ labels = new String[numNodes];
+ numChildren = new int[numNodes];
+ sourceStartIndices = new int[numNodes];
+ sourceEndIndices = new int[numNodes];
+ try {
+ initialize(tokens);
+ } catch (Exception e) {
+ // This will catch most formatting errors.
+ throw new IllegalArgumentException(
+ String.format("couldn't create tree from string: \"%s\"", s),
+ e);
+ }
+ }
+
+ private void initialize(String [] tokens) {
+ final Stack<Integer> stack = new Stack<Integer>();
+ int nodeIndex = 0;
+ for (String token : tokens) {
+ final Matcher matcher = NONTERMINAL_PATTERN.matcher(token);
+ if (matcher.matches()) {
+ // new non-terminal node
+ labels[nodeIndex] = matcher.group(1);
+ sourceStartIndices[nodeIndex] = Integer.parseInt(matcher.group(2));
+ sourceEndIndices[nodeIndex] = Integer.parseInt(matcher.group(3));
+ stack.push(nodeIndex);
+ nodeIndex++;
+ } else if (token.equals(")")) {
+ // finished a subtree
+ stack.pop();
+ if (stack.empty()) {
+ break;
+ } else {
+ numChildren[stack.peek()]++;
+ }
+ } else {
+ // otherwise, it's a new leaf node
+ labels[nodeIndex] = token;
+ sourceStartIndices[nodeIndex] = -1;
+ sourceEndIndices[nodeIndex] = -1;
+ numChildren[stack.peek()]++;
+ nodeIndex++;
+ }
+ }
+ if (!stack.empty()) {
+ // Not enough close-parentheses at the end of the tree.
+ throw new IllegalArgumentException();
+ }
+ }
+
+ /**
+ * Return the number of nodes in this Tree.
+ */
+ public int size() {
+ return labels.length;
+ }
+
+ /**
+ * Get the root Node of this Tree.
+ */
+ public Node root() {
+ return new Node(0);
+ }
+
+ private List<Integer> childIndices(int index) {
+ List<Integer> result = new ArrayList<Integer>();
+ int remainingChildren = numChildren[index];
+ int childIndex = index + 1;
+ while (remainingChildren > 0) {
+ result.add(childIndex);
+ childIndex = nextSiblingIndex(childIndex);
+ remainingChildren--;
+ }
+ return result;
+ }
+
+ private int nextSiblingIndex(int index) {
+ int result = index + 1;
+ int remainingChildren = numChildren[index];
+ for (int i = 0; i < remainingChildren; i++) {
+ result = nextSiblingIndex(result);
+ }
+ return result;
+ }
+
+ public String yield() {
+ String result = "";
+ for (int i = 0; i < labels.length; i++) {
+ if (numChildren[i] == 0) {
+ if (!result.equals("")) {
+ result += " ";
+ }
+ result += labels[i];
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return root().toString();
+ }
+
+ /**
+ * A class representing the Nodes of a tree.
+ */
+ public class Node {
+
+ /**
+ * The index into the Tree class's internal arrays.
+ */
+ private final int index;
+
+ private Node(int i) {
+ index = i;
+ }
+
+ /**
+ * Get the label for this node. If the node is internal to the tree, its
+ * label is the non-terminal label assigned to it. If it is a leaf node,
+ * the label is the English word at the leaf.
+ */
+ public String label() {
+ return labels[index];
+ }
+
+ public boolean isLeaf() {
+ return numChildren[index] == 0;
+ }
+
+ public int sourceStartIndex() {
+ return sourceStartIndices[index];
+ }
+
+ public int sourceEndIndex() {
+ return sourceEndIndices[index];
+ }
+
+ public List<Node> children() {
+ List<Node> result = new ArrayList<Node>();
+ for (int j : childIndices(index)) {
+ result.add(new Node(j));
+ }
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ if (isLeaf()) {
+ return label();
+ }
+ String result = String.format("(%s{%d-%d}",
+ label(),
+ sourceStartIndex(),
+ sourceEndIndex());
+ for (Node c : children()) {
+ result += String.format(" %s", c);
+ }
+ return result + ")";
+ }
+ }
+
+ public static class NodeSourceStartComparator implements Comparator<Node> {
+ public int compare(Node a, Node b) {
+ return a.sourceStartIndex() - b.sourceStartIndex();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/util/Algorithms.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/util/Algorithms.java b/src/main/java/org/apache/joshua/util/Algorithms.java
new file mode 100644
index 0000000..0f25ee2
--- /dev/null
+++ b/src/main/java/org/apache/joshua/util/Algorithms.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.util;
+
+public final class Algorithms {
+
+ /**
+ * Calculates the Levenshtein Distance for a candidate paraphrase given the source.
+ *
+ * The code is based on the example by Michael Gilleland found at
+ * http://www.merriampark.com/ld.htm.
+ *
+ */
+ public static final int levenshtein(String[] candidate, String[] source) {
+ // First check to see whether either of the arrays
+ // is empty, in which case the least cost is simply
+ // the length of the other array (which would correspond
+ // to inserting that many elements.
+ if (source.length == 0) return candidate.length;
+ if (candidate.length == 0) return source.length;
+
+ // Initialize a table to the minimum edit distances between
+ // any two points in the arrays. The size of the table is set
+ // to be one beyond the lengths of the two arrays, and the first
+ // row and first column are set to be zero to avoid complicated
+ // checks for out of bounds exceptions.
+ int distances[][] = new int[source.length + 1][candidate.length + 1];
+
+ for (int i = 0; i <= source.length; i++)
+ distances[i][0] = i;
+ for (int j = 0; j <= candidate.length; j++)
+ distances[0][j] = j;
+
+ // Walk through each item in the source and target arrays
+ // and find the minimum cost to move from the previous points
+ // to here.
+ for (int i = 1; i <= source.length; i++) {
+ Object sourceItem = source[i - 1];
+ for (int j = 1; j <= candidate.length; j++) {
+ Object targetItem = candidate[j - 1];
+ int cost;
+ if (sourceItem.equals(targetItem))
+ cost = 0;
+ else
+ cost = 1;
+ int deletionCost = distances[i - 1][j] + 1;
+ int insertionCost = distances[i][j - 1] + 1;
+ int substitutionCost = distances[i - 1][j - 1] + cost;
+ distances[i][j] = minimum(insertionCost, deletionCost, substitutionCost);
+ }
+ }
+ // The point at the end will be the minimum edit distance.
+ return distances[source.length][candidate.length];
+ }
+
+ /**
+ * Returns the minimum of the three values.
+ */
+ private static final int minimum(int a, int b, int c) {
+ int minimum;
+ minimum = a;
+ if (b < minimum) minimum = b;
+ if (c < minimum) minimum = c;
+ return minimum;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/util/Bits.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/util/Bits.java b/src/main/java/org/apache/joshua/util/Bits.java
new file mode 100644
index 0000000..2b95a5e
--- /dev/null
+++ b/src/main/java/org/apache/joshua/util/Bits.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.util;
+
+/**
+ * Utility class for bit twiddling.
+ *
+ * @author Lane Schwartz
+ */
+public class Bits {
+
+ /**
+ * Encodes two shorts in an int.
+ *
+ * @param high
+ * @param low
+ * @return
+ */
+ public static int encodeAsInt(short high, short low) {
+
+ // Store the first short value in the highest 16 bits of the int
+ int key = high | 0x00000000;
+ key <<= 16;
+
+ // Store the second short value in the lowest 16 bits of the int
+ int lowInt = low & 0x0000FFFF;
+ key |= lowInt;
+
+ return key;
+
+ }
+
+ /**
+ * Decodes the high 16 bits of an integer as a short.
+ *
+ * @param i Integer value to decode
+ * @return Short representation of the high 16 bits of the integer
+ */
+ public static short decodeHighBits(int i) {
+
+ long key = i & 0xFFFF0000l;
+
+ key >>= 16;
+
+ return (short) key;
+
+ }
+
+
+ /**
+ * Decodes the low 16 bits of an integer as a short.
+ *
+ * @param i Integer value to decode
+ * @return Short representation of the high 16 bits of the integer
+ */
+ public static short decodeLowBits(int i) {
+
+ return (short) i;
+
+ }
+
+
+ /**
+ * Encodes two integers in a long.
+ *
+ * @param high
+ * @param low
+ * @return
+ */
+ public static long encodeAsLong(int high, int low) {
+
+ // Store the first int value in the highest 32 bits of the long
+ long key = high | 0x0000000000000000l;
+ key <<= 32;
+
+ // Store the second int value in the lowest 32 bits of the long
+ long lowLong = low & 0x00000000FFFFFFFFl;;
+ key |= lowLong;
+
+ return key;
+
+ }
+
+ /**
+ * Decodes the high 32 bits of a long as an integer.
+ *
+ * @param l Long value to decode
+ * @return Integer representation of the high 32 bits of the long
+ */
+ public static int decodeHighBits(long l) {
+
+ long key = l & 0xFFFFFFFF00000000l;
+
+ key >>= 32;
+
+ return (int) key;
+
+ }
+
+
+ /**
+ * Decodes the low 32 bits of a long as an integer.
+ *
+ * @param l Long value to decode
+ * @return Integer representation of the high 32 bits of the long
+ */
+ public static int decodeLowBits(long l) {
+
+ return (int) l;
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/main/java/org/apache/joshua/util/BotMap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/util/BotMap.java b/src/main/java/org/apache/joshua/util/BotMap.java
new file mode 100644
index 0000000..32dea01
--- /dev/null
+++ b/src/main/java/org/apache/joshua/util/BotMap.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.util;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Gets a special map that maps any key to the a particular value.
+ *
+ * @author Lane Schwartz
+ * @see "Lopez (2008), footnote 9 on p73"
+ */
+public class BotMap<K, V> implements Map<K, V> {
+
+ /** Special value, which this map will return for every key. */
+ private final V value;
+
+ /**
+ * Constructs a special map that maps any key to the a particular value.
+ *
+ * @param value Special value, which this map will return for every key.
+ */
+ public BotMap(V value) {
+ this.value = value;
+ }
+
+ public void clear() {
+ throw new UnsupportedOperationException();
+ }
+
+ public boolean containsKey(Object key) {
+ return true;
+ }
+
+ public boolean containsValue(Object value) {
+ return this.value == value;
+ }
+
+ public Set<Map.Entry<K, V>> entrySet() {
+ throw new UnsupportedOperationException();
+ }
+
+ public V get(Object key) {
+ return value;
+ }
+
+ public boolean isEmpty() {
+ return false;
+ }
+
+ public Set<K> keySet() {
+ throw new UnsupportedOperationException();
+ }
+
+ public V put(K key, V value) {
+ throw new UnsupportedOperationException();
+ }
+
+ public void putAll(Map<? extends K, ? extends V> t) {
+ throw new UnsupportedOperationException();
+ }
+
+ public V remove(Object key) {
+ throw new UnsupportedOperationException();
+ }
+
+ public int size() {
+ throw new UnsupportedOperationException();
+ }
+
+ public Collection<V> values() {
+ return Collections.singleton(value);
+ }
+
+}