You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/23 18:45:25 UTC

[14/60] [partial] incubator-joshua git commit: maven multi-module layout 1st commit: moving files into joshua-core

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/packed/test.sh
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/test.sh b/joshua-core/src/test/java/org/apache/joshua/packed/test.sh
new file mode 100644
index 0000000..be6cf27
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/packed/test.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# test the vocabulary
+# javac VocabTest.java
+# java -cp .:${JOSHUA}/bin VocabTest small_packed

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/system/AlignmentMapTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/system/AlignmentMapTest.java b/joshua-core/src/test/java/org/apache/joshua/system/AlignmentMapTest.java
new file mode 100644
index 0000000..eba732a
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/system/AlignmentMapTest.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ package org.apache.joshua.system;
+
+import static org.junit.Assert.*;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.ff.tm.Rule;
+
+import org.junit.Before;
+import org.junit.Test;
+
+public class AlignmentMapTest {
+  
+  private Rule rule1 = null;
+  private Rule rule2 = null;
+  private static Map<Integer, List<Integer>> expectedAlignmentMap = null;
+  private static final int[] expectedNonTerminalPositions = {2,5};
+
+  @Before
+  public void setUp() throws Exception {
+    Vocabulary.clear();
+    int[] sourceRhs = {Vocabulary.id("A1"),Vocabulary.id("A2"),-1,Vocabulary.id("B"),Vocabulary.id("C"),-2};
+    int[] targetRhs = {Vocabulary.id("c"),Vocabulary.id("b1"),-1,Vocabulary.id("b2"),-4,Vocabulary.id("a")};
+    int arity = 2; // 2 non terminals
+    String alignment = "0-5 1-5 3-1 3-3 4-0";
+    expectedAlignmentMap = new HashMap<Integer, List<Integer>>();
+    expectedAlignmentMap.put(0, Arrays.asList(4));
+    expectedAlignmentMap.put(5, Arrays.asList(0,1));
+    expectedAlignmentMap.put(1, Arrays.asList(3));
+    expectedAlignmentMap.put(3, Arrays.asList(3));
+    rule1 = new Rule(-1, sourceRhs, targetRhs, "", arity, alignment);
+    rule2 = new Rule(-1, sourceRhs, targetRhs, "", arity, null); // rule with no alignment
+  }
+
+  @Test
+  public void test() {
+    // test regular rule with arity 2
+    Map<Integer, List<Integer>> alignmentMap1 = rule1.getAlignmentMap();
+    assertEquals(expectedAlignmentMap, alignmentMap1);
+    int[] nonTerminalPositions1 = rule1.getNonTerminalSourcePositions();
+    assertArrayEquals(expectedNonTerminalPositions, nonTerminalPositions1);
+    
+    // test rule with no alignment
+    Map<Integer, List<Integer>> alignmentMap2 = rule2.getAlignmentMap();
+    assertTrue(alignmentMap2.isEmpty());
+    int[] nonTerminalPositions2 = rule2.getNonTerminalSourcePositions();
+    assertArrayEquals(expectedNonTerminalPositions, nonTerminalPositions2);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/system/KenLmTest.java b/joshua-core/src/test/java/org/apache/joshua/system/KenLmTest.java
new file mode 100644
index 0000000..1f032d8
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/system/KenLmTest.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ package org.apache.joshua.system;
+
+import static org.apache.joshua.corpus.Vocabulary.registerLanguageModel;
+import static org.apache.joshua.corpus.Vocabulary.unregisterLanguageModels;
+import static org.junit.Assert.*;
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.ff.lm.KenLM;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * KenLM JNI interface tests.
+ * Loads libken.{so,dylib}.
+ * If run in Eclipse, add -Djava.library.path=build/lib to JVM arguments
+ * of the run configuration.
+ */
+@Ignore("re-enable as soon as kenlm native library support will be in place")
+public class KenLmTest {
+
+  private static final String LANGUAGE_MODEL_PATH = "resources/kenlm/oilers.kenlm";
+
+  @Test
+  public void givenKenLm_whenQueryingForNgramProbability_thenProbIsCorrect() {
+    // GIVEN
+    KenLM kenLm = new KenLM(3, LANGUAGE_MODEL_PATH);
+    int[] words = Vocabulary.addAll("Wayne Gretzky");
+    registerLanguageModel(kenLm);
+
+    // WHEN
+    float probability = kenLm.prob(words);
+
+    // THEN
+    assertEquals("Found the wrong probability for 2-gram \"Wayne Gretzky\"", -0.99f, probability,
+        Float.MIN_VALUE);
+  }
+  
+  @Test
+  public void givenKenLm_whenQueryingForNgramProbability_thenIdAndStringMethodsReturnTheSame() {
+    // GIVEN
+    KenLM kenLm = new KenLM(LANGUAGE_MODEL_PATH);
+    registerLanguageModel(kenLm);
+    String sentence = "Wayne Gretzky";
+    String[] words = sentence.split("\\s+");
+    int[] ids = Vocabulary.addAll(sentence);
+
+    // WHEN
+    float prob_string = kenLm.prob(words);
+    float prob_id = kenLm.prob(ids);
+
+    // THEN
+    assertEquals("ngram probabilities differ for word and id based n-gram query", prob_string, prob_id,
+            Float.MIN_VALUE);
+
+  }
+
+  @Test
+  public void givenKenLm_whenIsKnownWord_thenReturnValuesAreCorrect() {
+    KenLM kenLm = new KenLM(LANGUAGE_MODEL_PATH);
+    assertTrue(kenLm.isKnownWord("Wayne"));
+    assertFalse(kenLm.isKnownWord("Wayne2222"));
+  }
+
+  @Before
+  public void setUp() throws Exception {
+    Vocabulary.clear();
+    unregisterLanguageModels();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    Vocabulary.clear();
+    unregisterLanguageModels();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java b/joshua-core/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
new file mode 100644
index 0000000..3901f40
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ package org.apache.joshua.system;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.Translations;
+import org.apache.joshua.decoder.io.TranslationRequestStream;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Integration test for multithreaded Joshua decoder tests. Grammar used is a
+ * toy packed grammar.
+ *
+ * @author kellens
+ */
+public class MultithreadedTranslationTests {
+
+  private JoshuaConfiguration joshuaConfig = null;
+  private Decoder decoder = null;
+  private static final String INPUT = "A K B1 U Z1 Z2 B2 C";
+  private int previousLogLevel;
+  private final static long NANO_SECONDS_PER_SECOND = 1_000_000_000;
+
+  @Before
+  public void setUp() throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.search_algorithm = "cky";
+    joshuaConfig.mark_oovs = false;
+    joshuaConfig.pop_limit = 100;
+    joshuaConfig.use_unique_nbest = false;
+    joshuaConfig.include_align_index = false;
+    joshuaConfig.topN = 0;
+    joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path resources/wa_grammar.packed");
+    joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path resources/grammar.glue");
+    joshuaConfig.goal_symbol = "[GOAL]";
+    joshuaConfig.default_non_terminal = "[X]";
+    joshuaConfig.features.add("OOVPenalty");
+    joshuaConfig.weights.add("tm_pt_0 1");
+    joshuaConfig.weights.add("tm_pt_1 1");
+    joshuaConfig.weights.add("tm_pt_2 1");
+    joshuaConfig.weights.add("tm_pt_3 1");
+    joshuaConfig.weights.add("tm_pt_4 1");
+    joshuaConfig.weights.add("tm_pt_5 1");
+    joshuaConfig.weights.add("tm_glue_0 1");
+    joshuaConfig.weights.add("OOVPenalty 2");
+    joshuaConfig.num_parallel_decoders = 500; // This will enable 500 parallel
+                                              // decoders to run at once.
+                                              // Useful to help flush out
+                                              // concurrency errors in
+                                              // underlying
+                                              // data-structures.
+    this.decoder = new Decoder(joshuaConfig, ""); // Second argument
+                                                  // (configFile)
+                                                  // is not even used by the
+                                                  // constructor/initialize.
+
+    previousLogLevel = Decoder.VERBOSE;
+    Decoder.VERBOSE = 0;
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    this.decoder.cleanUp();
+    this.decoder = null;
+    Decoder.VERBOSE = previousLogLevel;
+  }
+
+
+
+  // This test was created specifically to reproduce a multithreaded issue
+  // related to mapped byte array access in the PackedGrammer getAlignmentArray
+  // function.
+
+  // We'll test the decoding engine using N = 10,000 identical inputs. This
+  // should be sufficient to induce concurrent data access for many shared
+  // data structures.
+
+  @Test
+  public void givenPackedGrammar_whenNTranslationsCalledConcurrently_thenReturnNResults() throws IOException {
+    // GIVEN
+
+    int inputLines = 10000;
+    joshuaConfig.use_structured_output = true; // Enabled alignments.
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < inputLines; i++) {
+      sb.append(INPUT + "\n");
+    }
+
+    // Append a large string together to simulate N requests to the decoding
+    // engine.
+    TranslationRequestStream req = new TranslationRequestStream(
+        new BufferedReader(new InputStreamReader(new ByteArrayInputStream(sb.toString()
+        .getBytes(Charset.forName("UTF-8"))))), joshuaConfig);
+    
+    ByteArrayOutputStream output = new ByteArrayOutputStream();
+
+    // WHEN
+    // Translate all spans in parallel.
+    Translations translations = this.decoder.decodeAll(req);
+
+    ArrayList<Translation> translationResults = new ArrayList<Translation>();
+
+
+    final long translationStartTime = System.nanoTime();
+    try {
+      for (Translation t: translations)
+        translationResults.add(t);
+    } finally {
+      if (output != null) {
+        try {
+          output.close();
+        } catch (IOException e) {
+          e.printStackTrace();
+        }
+      }
+    }
+
+    final long translationEndTime = System.nanoTime();
+    final double pipelineLoadDurationInSeconds = (translationEndTime - translationStartTime) / ((double)NANO_SECONDS_PER_SECOND);
+    System.err.println(String.format("%.2f seconds", pipelineLoadDurationInSeconds));
+
+    // THEN
+    assertTrue(translationResults.size() == inputLines);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java b/joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
new file mode 100644
index 0000000..f5e9d34
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.system;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.Assert;
+
+/**
+ * Integration test for the complete Joshua decoder using a toy grammar that translates
+ * a bunch of capital letters to lowercase letters. Rules in the test grammar
+ * drop and generate additional words and simulate reordering of rules, so that
+ * proper extraction of word alignments can be tested.
+ *
+ * @author fhieber
+ */
+public class StructuredOutputTest {
+
+  private JoshuaConfiguration joshuaConfig = null;
+  private Decoder decoder = null;
+  private Translation translation = null;
+  private static final String input = "A K B1 U Z1 Z2 B2 C";
+  private static final String expectedTranslation = "a b n1 u z c1 k1 k2 k3 n1 n2 n3 c2";
+  private static final String expectedWordAlignmentString = "0-0 2-1 6-1 3-3 4-4 5-4 7-5 1-6 1-7 1-8 7-12";
+  private static final List<List<Integer>> expectedWordAlignment = Arrays.asList(
+      Arrays.asList(0), Arrays.asList(2, 6), Arrays.asList(), Arrays.asList(3),
+      Arrays.asList(4, 5), Arrays.asList(7), Arrays.asList(1),
+      Arrays.asList(1), Arrays.asList(1), Arrays.asList(), Arrays.asList(),
+      Arrays.asList(), Arrays.asList(7));
+  private static final double expectedScore = -17.0;
+
+  @Before
+  public void setUp() throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.search_algorithm = "cky";
+    joshuaConfig.mark_oovs = false;
+    joshuaConfig.pop_limit = 100;
+    joshuaConfig.use_unique_nbest = false;
+    joshuaConfig.include_align_index = false;
+    joshuaConfig.topN = 0;
+    joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path resources/wa_grammar");
+    joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path resources/grammar.glue");
+    joshuaConfig.goal_symbol = "[GOAL]";
+    joshuaConfig.default_non_terminal = "[X]";
+    joshuaConfig.features.add("OOVPenalty");
+    joshuaConfig.weights.add("tm_pt_0 1");
+    joshuaConfig.weights.add("tm_pt_1 1");
+    joshuaConfig.weights.add("tm_pt_2 1");
+    joshuaConfig.weights.add("tm_pt_3 1");
+    joshuaConfig.weights.add("tm_pt_4 1");
+    joshuaConfig.weights.add("tm_pt_5 1");
+    joshuaConfig.weights.add("tm_glue_0 1");
+    joshuaConfig.weights.add("OOVPenalty 2");
+    decoder = new Decoder(joshuaConfig, ""); // second argument (configFile
+                                             // is not even used by the
+                                             // constructor/initialize)
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    decoder.cleanUp();
+    decoder = null;
+    translation = null;
+  }
+
+  private Translation decode(String input) {
+    Sentence sentence = new Sentence(input, 0, joshuaConfig);
+    return decoder.decode(sentence);
+  }
+
+  @Test
+  public void test() {
+
+    // test standard output
+    joshuaConfig.use_structured_output = false;
+    joshuaConfig.outputFormat = "%s | %a ";
+    translation = decode(input);
+    Assert.assertEquals(expectedTranslation + " | "
+        + expectedWordAlignmentString, translation.toString().trim());
+
+    // test structured output
+    joshuaConfig.use_structured_output = true; // set structured output creation to true
+    translation = decode(input);
+    Assert.assertEquals(expectedTranslation, translation.getStructuredTranslations().get(0).getTranslationString());
+    Assert.assertEquals(Arrays.asList(expectedTranslation.split("\\s+")),
+        translation.getStructuredTranslations().get(0).getTranslationTokens());
+    Assert.assertEquals(expectedScore, translation.getStructuredTranslations().get(0).getTranslationScore(),
+        0.00001);
+    Assert.assertEquals(expectedWordAlignment, translation.getStructuredTranslations().get(0).getTranslationWordAlignments());
+    Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationWordAlignments().size(), translation
+        .getStructuredTranslations().get(0).getTranslationTokens().size());
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java b/joshua-core/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
new file mode 100644
index 0000000..6718858
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
@@ -0,0 +1,274 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.system;
+
+import static java.util.Arrays.asList;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.StructuredTranslation;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Integration test for the complete Joshua decoder using a toy grammar that translates
+ * a bunch of capital letters to lowercase letters. Rules in the test grammar
+ * drop and generate additional words and simulate reordering of rules, so that
+ * proper extraction of word alignments and other information from the decoder
+ * can be tested.
+ *
+ * @author fhieber
+ */
+public class StructuredTranslationTest {
+
+  private JoshuaConfiguration joshuaConfig = null;
+  private Decoder decoder = null;
+  private static final String INPUT = "A K B1 U Z1 Z2 B2 C";
+  private static final String EXPECTED_TRANSLATION = "a b n1 u z c1 k1 k2 k3 n1 n2 n3 c2";
+  private static final List<String> EXPECTED_TRANSLATED_TOKENS = asList(EXPECTED_TRANSLATION.split("\\s+"));
+  private static final String EXPECTED_WORD_ALIGNMENT_STRING = "0-0 2-1 6-1 3-3 4-4 5-4 7-5 1-6 1-7 1-8 7-12";
+  private static final List<List<Integer>> EXPECTED_WORD_ALIGNMENT = asList(
+      asList(0), asList(2, 6), asList(), asList(3),
+      asList(4, 5), asList(7), asList(1),
+      asList(1), asList(1), asList(), asList(),
+      asList(), asList(7));
+  private static final double EXPECTED_SCORE = -17.0;
+  private static final Map<String,Float> EXPECTED_FEATURES = new HashMap<>();
+  private static final int EXPECTED_NBEST_LIST_SIZE = 8;
+  static {
+    EXPECTED_FEATURES.put("tm_glue_0", 1.0f);
+    EXPECTED_FEATURES.put("tm_pt_0", -3.0f);
+    EXPECTED_FEATURES.put("tm_pt_1", -3.0f);
+    EXPECTED_FEATURES.put("tm_pt_2", -3.0f);
+    EXPECTED_FEATURES.put("tm_pt_3", -3.0f);
+    EXPECTED_FEATURES.put("tm_pt_4", -3.0f);
+    EXPECTED_FEATURES.put("tm_pt_5", -3.0f);
+    EXPECTED_FEATURES.put("OOV", 7.0f);
+    EXPECTED_FEATURES.put("OOVPenalty", 0.0f);
+  }
+
+  @Before
+  public void setUp() throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.search_algorithm = "cky";
+    joshuaConfig.mark_oovs = false;
+    joshuaConfig.pop_limit = 100;
+    joshuaConfig.use_unique_nbest = false;
+    joshuaConfig.include_align_index = false;
+    joshuaConfig.topN = 0;
+    joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path resources/wa_grammar");
+    joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path resources/grammar.glue");
+    joshuaConfig.goal_symbol = "[GOAL]";
+    joshuaConfig.default_non_terminal = "[X]";
+    joshuaConfig.features.add("OOVPenalty");
+    joshuaConfig.weights.add("tm_pt_0 1");
+    joshuaConfig.weights.add("tm_pt_1 1");
+    joshuaConfig.weights.add("tm_pt_2 1");
+    joshuaConfig.weights.add("tm_pt_3 1");
+    joshuaConfig.weights.add("tm_pt_4 1");
+    joshuaConfig.weights.add("tm_pt_5 1");
+    joshuaConfig.weights.add("tm_glue_0 1");
+    joshuaConfig.weights.add("OOVPenalty 1");
+    decoder = new Decoder(joshuaConfig, ""); // second argument (configFile
+                                             // is not even used by the
+                                             // constructor/initialize)
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    decoder.cleanUp();
+    decoder = null;
+  }
+
+  private Translation decode(String input) {
+    Sentence sentence = new Sentence(input, 0, joshuaConfig);
+    return decoder.decode(sentence);
+  }
+
+  @Test
+  public void givenInput_whenRegularOutputFormat_thenExpectedOutput() {
+    // GIVEN
+    joshuaConfig.use_structured_output = false;
+    joshuaConfig.outputFormat = "%s | %a ";
+
+    // WHEN
+    final String translation = decode(INPUT).toString().trim();
+
+    // THEN
+    assertEquals(EXPECTED_TRANSLATION + " | " + EXPECTED_WORD_ALIGNMENT_STRING, translation);
+  }
+
+  @Test
+  public void givenInput_whenRegularOutputFormatWithTopN1_thenExpectedOutput() {
+    // GIVEN
+    joshuaConfig.use_structured_output = false;
+    joshuaConfig.outputFormat = "%s | %e | %a | %c";
+    joshuaConfig.topN = 1;
+
+    // WHEN
+    final String translation = decode(INPUT).toString().trim();
+
+    // THEN
+    assertEquals(EXPECTED_TRANSLATION + " | " + INPUT + " | " + EXPECTED_WORD_ALIGNMENT_STRING + String.format(" | %.3f", EXPECTED_SCORE),
+        translation);
+  }
+
+  @Test
+  public void givenInput_whenStructuredOutputFormatWithTopN0_thenExpectedOutput() {
+    // GIVEN
+    joshuaConfig.use_structured_output = true;
+    joshuaConfig.topN = 0;
+
+    // WHEN
+    final Translation translation = decode(INPUT);
+    final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
+    final String translationString = structuredTranslation.getTranslationString();
+    final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
+    final float translationScore = structuredTranslation.getTranslationScore();
+    final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
+    final Map<String,Float> translationFeatures = structuredTranslation.getTranslationFeatures();
+
+    // THEN
+    assertTrue(translation.getStructuredTranslations().size() == 1);
+    assertEquals(EXPECTED_TRANSLATION, translationString);
+    assertEquals(EXPECTED_TRANSLATED_TOKENS, translatedTokens);
+    assertEquals(EXPECTED_SCORE, translationScore, 0.00001);
+    assertEquals(EXPECTED_WORD_ALIGNMENT, wordAlignment);
+    assertEquals(wordAlignment.size(), translatedTokens.size());
+    assertEquals(EXPECTED_FEATURES.entrySet(), translationFeatures.entrySet());
+  }
+
+  @Test
+  public void givenInput_whenStructuredOutputFormatWithTopN1_thenExpectedOutput() {
+    // GIVEN
+    joshuaConfig.use_structured_output = true;
+    joshuaConfig.topN = 1;
+
+    // WHEN
+    final Translation translation = decode(INPUT);
+    final List<StructuredTranslation> structuredTranslations = translation.getStructuredTranslations();
+    final StructuredTranslation structuredTranslation = structuredTranslations.get(0);
+    final String translationString = structuredTranslation.getTranslationString();
+    final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
+    final float translationScore = structuredTranslation.getTranslationScore();
+    final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
+    final Map<String,Float> translationFeatures = structuredTranslation.getTranslationFeatures();
+
+    // THEN
+    assertTrue(structuredTranslations.size() == 1);
+    assertEquals(EXPECTED_TRANSLATION, translationString);
+    assertEquals(EXPECTED_TRANSLATED_TOKENS, translatedTokens);
+    assertEquals(EXPECTED_SCORE, translationScore, 0.00001);
+    assertEquals(EXPECTED_WORD_ALIGNMENT, wordAlignment);
+    assertEquals(wordAlignment.size(), translatedTokens.size());
+    assertEquals(EXPECTED_FEATURES.entrySet(), translationFeatures.entrySet());
+  }
+
+  @Test
+  public void givenInput_whenStructuredOutputFormatWithKBest_thenExpectedOutput() {
+    // GIVEN
+    joshuaConfig.use_structured_output = true;
+    joshuaConfig.topN = 100;
+
+    // WHEN
+    final Translation translation = decode(INPUT);
+    final List<StructuredTranslation> structuredTranslations = translation.getStructuredTranslations();
+    final StructuredTranslation viterbiTranslation = structuredTranslations.get(0);
+    final StructuredTranslation lastKBest = structuredTranslations.get(structuredTranslations.size() - 1);
+
+    // THEN
+    assertEquals(structuredTranslations.size(), EXPECTED_NBEST_LIST_SIZE);
+    assertTrue(structuredTranslations.size() > 1);
+    assertEquals(EXPECTED_TRANSLATION, viterbiTranslation.getTranslationString());
+    assertEquals(EXPECTED_TRANSLATED_TOKENS, viterbiTranslation.getTranslationTokens());
+    assertEquals(EXPECTED_SCORE, viterbiTranslation.getTranslationScore(), 0.00001);
+    assertEquals(EXPECTED_WORD_ALIGNMENT, viterbiTranslation.getTranslationWordAlignments());
+    assertEquals(EXPECTED_FEATURES.entrySet(), viterbiTranslation.getTranslationFeatures().entrySet());
+    // last entry in KBEST is all input words untranslated, should have 8 OOVs.
+    assertEquals(INPUT, lastKBest.getTranslationString());
+    assertEquals(-800.0, lastKBest.getTranslationFeatures().get("OOVPenalty"), 0.0001);
+
+  }
+
+  @Test
+  public void givenEmptyInput_whenStructuredOutputFormat_thenEmptyOutput() {
+    // GIVEN
+    joshuaConfig.use_structured_output = true;
+
+    // WHEN
+    final Translation translation = decode("");
+    final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
+    final String translationString = structuredTranslation.getTranslationString();
+    final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
+    final float translationScore = structuredTranslation.getTranslationScore();
+    final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
+
+    // THEN
+    assertEquals("", translationString);
+    assertTrue(translatedTokens.isEmpty());
+    assertEquals(0, translationScore, 0.00001);
+    assertTrue(wordAlignment.isEmpty());
+  }
+
+  @Test
+  public void givenOOVInput_whenStructuredOutputFormat_thenOOVOutput() {
+    // GIVEN
+    joshuaConfig.use_structured_output = true;
+    final String input = "gabarbl";
+
+    // WHEN
+    final Translation translation = decode(input);
+    final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
+    final String translationString = structuredTranslation.getTranslationString();
+    final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
+    final float translationScore = structuredTranslation.getTranslationScore();
+    final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
+
+    // THEN
+    assertEquals(input, translationString);
+    assertTrue(translatedTokens.contains(input));
+    assertEquals(-99.0, translationScore, 0.00001);
+    assertTrue(wordAlignment.contains(asList(0)));
+  }
+
+  @Test
+  public void givenEmptyInput_whenRegularOutputFormat_thenNewlineOutput() {
+    // GIVEN
+    joshuaConfig.use_structured_output = false;
+    joshuaConfig.outputFormat = "%s";
+
+    // WHEN
+    final Translation translation = decode("");
+    final String translationString = translation.toString();
+
+    // THEN
+    assertEquals("\n", translationString);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/ui/tree_visualizer/tree/TreeTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/ui/tree_visualizer/tree/TreeTest.java b/joshua-core/src/test/java/org/apache/joshua/ui/tree_visualizer/tree/TreeTest.java
new file mode 100644
index 0000000..55e8f56
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/ui/tree_visualizer/tree/TreeTest.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.ui.tree_visualizer.tree;
+
+import java.util.List;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class TreeTest {
+  @Test(expectedExceptions = { IllegalArgumentException.class })
+  public void ctor_EmptyString_IllegalArgument() {
+    Tree tree = new Tree("");
+    Assert.assertEquals(tree.size(), 0);
+  }
+
+  @Test(expectedExceptions = { IllegalArgumentException.class })
+  public void ctor_TooFewCloseParens_IllegalArgument() {
+    Tree tree = new Tree("(A{0-1} foo");
+    Assert.assertEquals(tree.size(), 0);
+  }
+
+  @Test
+  public void simpleTree_correctSize() {
+    Tree tree = new Tree("(A{0-1} foo)");
+    Assert.assertEquals(tree.size(), 2);
+  }
+
+  @Test
+  public void simpleTree_correctRoot() {
+    Tree tree = new Tree("(A{0-1} foo)");
+    Tree.Node root = tree.root();
+    Assert.assertEquals(root.label(), "A");
+    Assert.assertEquals(root.sourceStartIndex(), 0);
+    Assert.assertEquals(root.sourceEndIndex(), 1);
+    Assert.assertEquals(root.children().size(), 1);
+  }
+
+  @Test
+  public void simpleTree_correctLeaf() {
+    Tree tree = new Tree("(A{0-1} foo)");
+    Tree.Node leaf = tree.root().children().get(0);
+    Assert.assertEquals(leaf.label(), "foo");
+    Assert.assertEquals(leaf.sourceStartIndex(), -1);
+    Assert.assertEquals(leaf.sourceEndIndex(), -1);
+    Assert.assertEquals(leaf.children().size(), 0);
+  }
+
+  @Test
+  public void simpleTree_toString() {
+    Tree tree = new Tree("(A{0-1} foo)");
+    Assert.assertEquals(tree.toString(), "(A{0-1} foo)");
+  }
+
+  @Test
+  public void trickyTree_children() {
+    Tree tree = new Tree("(A{0-2} foo (B{1-2} bar))");
+    List<Tree.Node> children = tree.root().children();
+    Assert.assertEquals(children.size(), 2);
+    Tree.Node foo = children.get(0);
+    Assert.assertEquals(foo.label(), "foo");
+    Assert.assertTrue(foo.isLeaf());
+    Assert.assertEquals(foo.sourceStartIndex(), -1);
+    Assert.assertEquals(foo.sourceEndIndex(), -1);
+    Tree.Node b = children.get(1);
+    Assert.assertEquals(b.label(), "B");
+    Assert.assertEquals(b.children().size(), 1);
+    Assert.assertFalse(b.isLeaf());
+    Assert.assertEquals(b.sourceStartIndex(), 1);
+    Assert.assertEquals(b.sourceEndIndex(), 2);
+  }
+
+  @Test
+  public void SourceStartComparator() {
+    Tree tree = new Tree("(A{0-2} foo (B{1-2} bar))");
+    Tree.Node a = tree.root();
+    Tree.Node b = a.children().get(1);
+    Tree.NodeSourceStartComparator cmp = new Tree.NodeSourceStartComparator();
+    Assert.assertTrue(cmp.compare(a, b) < 0);
+  }
+
+  @Test
+  public void SourceStartComparator_LeafSmallerThanAllInternals() {
+    Tree tree = new Tree("(A{0-2} foo (B{1-2} bar))");
+    Tree.Node a = tree.root();
+    Tree.Node foo = a.children().get(0);
+    Tree.Node b = a.children().get(1);
+    Tree.Node bar = b.children().get(0);
+    Tree.NodeSourceStartComparator cmp = new Tree.NodeSourceStartComparator();
+    Assert.assertTrue(cmp.compare(foo, a) < 0);
+    Assert.assertTrue(cmp.compare(foo, b) < 0);
+    Assert.assertTrue(cmp.compare(bar, a) < 0);
+    Assert.assertTrue(cmp.compare(bar, b) < 0);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/util/BitsTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/util/BitsTest.java b/joshua-core/src/test/java/org/apache/joshua/util/BitsTest.java
new file mode 100644
index 0000000..50704dc
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/util/BitsTest.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for doing bit twiddling.
+ *
+ * @author Lane Schwartz
+ */
+public class BitsTest {
+
+	@Test
+	public void positiveLowBitsLongEncoding() {
+		
+		int[] highs = {Integer.MIN_VALUE, -1234567890, -1, 0, 1, 1234567890, Integer.MAX_VALUE};
+		
+		for (int high : highs) {
+			for (int low=0, step=(Integer.MAX_VALUE/754); low>=0 && low<=Integer.MAX_VALUE; low+=step) {
+				
+				Assert.assertTrue(step > 0);
+				Assert.assertTrue(low >= 0);
+
+				long encoded = Bits.encodeAsLong(high, low);
+
+				Assert.assertEquals(Bits.decodeHighBits(encoded), high);
+				Assert.assertEquals(Bits.decodeLowBits(encoded), low);
+			}
+		}
+		
+	}
+	
+	@Test
+	public void negativeLowBitsLongEncoding() {
+
+		int[] highs = {Integer.MIN_VALUE, -1234567890, -1, 0, 1, 1234567890, Integer.MAX_VALUE};
+
+		for (int high : highs) {
+			for (int low=Integer.MIN_VALUE, step=(Integer.MAX_VALUE/754); low<=0 && low>=Integer.MIN_VALUE; low-=step) {
+
+				Assert.assertTrue(step > 0);
+				Assert.assertTrue(low <= 0);
+
+				long encoded = Bits.encodeAsLong(high, low);
+
+				Assert.assertEquals(Bits.decodeHighBits(encoded), high);
+				Assert.assertEquals(Bits.decodeLowBits(encoded), low);
+			}
+		}
+	}
+	
+	
+	@Test
+	public void positiveHighBitsLongEncoding() {
+		
+		int[] lows = {Integer.MIN_VALUE, -1234567890, -1, 0, 1, 1234567890, Integer.MAX_VALUE};
+		
+		for (int low : lows) {
+			for (int high=0, step=(Integer.MAX_VALUE/754); high>=0 && high<=Integer.MAX_VALUE; high+=step) {
+				
+				Assert.assertTrue(step > 0);
+				Assert.assertTrue(high >= 0);
+
+				long encoded = Bits.encodeAsLong(high, low);
+
+				Assert.assertEquals(Bits.decodeHighBits(encoded), high);
+				Assert.assertEquals(Bits.decodeLowBits(encoded), low);
+			}
+		}
+	}
+	
+	@Test
+	public void negativeHighBitsLongEncoding() {
+
+		int[] lows = {Integer.MIN_VALUE, -1234567890, -1, 0, 1, 1234567890, Integer.MAX_VALUE};
+
+		for (int low : lows) {
+			for (int high=Integer.MIN_VALUE, step=(Integer.MAX_VALUE/754); high<=0 && high>=Integer.MIN_VALUE; high-=step) {
+
+				Assert.assertTrue(step > 0);
+				Assert.assertTrue(high <= 0);
+
+				long encoded = Bits.encodeAsLong(high, low);
+
+				Assert.assertEquals(Bits.decodeHighBits(encoded), high);
+				Assert.assertEquals(Bits.decodeLowBits(encoded), low);
+			}
+		}
+	}
+	
+	
+	@Test
+	public void positiveLowBitsIntEncoding() {
+		
+		short[] highs = {Short.MIN_VALUE, -12345, -1, 0, 1, 12345, Short.MAX_VALUE};
+		
+		for (short high : highs) {
+			for (short low=0, step=(Short.MAX_VALUE/75); low>=0 && low<=Short.MAX_VALUE; low+=step) {
+				
+				Assert.assertTrue(step > 0);
+				Assert.assertTrue(low >= 0);
+
+				int encoded = Bits.encodeAsInt(high, low);
+
+				Assert.assertEquals(Bits.decodeHighBits(encoded), high);
+				Assert.assertEquals(Bits.decodeLowBits(encoded), low);
+			}
+		}
+		
+	}
+	
+	@Test
+	public void negativeLowBitsIntEncoding() {
+
+		short[] highs = {Short.MIN_VALUE, -12345, -1, 0, 1, 12345, Short.MAX_VALUE};
+
+		for (short high : highs) {
+			for (short low=0, step=(Short.MAX_VALUE/75); low>=0 && low>=Short.MIN_VALUE; low-=step) {
+
+				Assert.assertTrue(step > 0);
+				Assert.assertTrue(low <= 0);
+
+				int encoded = Bits.encodeAsInt(high, low);
+
+				Assert.assertEquals(Bits.decodeHighBits(encoded), high);
+				Assert.assertEquals(Bits.decodeLowBits(encoded), low);
+			}
+		}
+	}
+	
+	
+	@Test
+	public void positiveHighBitsIntEncoding() {
+		
+		short[] lows = {Short.MIN_VALUE, -12345, -1, 0, 1, 12345, Short.MAX_VALUE};
+		
+		for (short low : lows) {
+			for (short high=0, step=(Short.MAX_VALUE/75); high>=0 && high<=Short.MAX_VALUE; high+=step) {
+				
+				Assert.assertTrue(step > 0);
+				Assert.assertTrue(high >= 0);
+
+				int encoded = Bits.encodeAsInt(high, low);
+
+				Assert.assertEquals(Bits.decodeHighBits(encoded), high);
+				Assert.assertEquals(Bits.decodeLowBits(encoded), low);
+			}
+		}
+	}
+	
+	@Test
+	public void negativeHighBitsIntEncoding() {
+
+		short[] lows = {Short.MIN_VALUE, -12345, -1, 0, 1, 12345, Short.MAX_VALUE};
+		
+		for (short low : lows) {
+			for (short high=0, step=(Short.MAX_VALUE/75); high>=0 && high>=Short.MIN_VALUE; high-=step) {
+
+				Assert.assertTrue(step > 0);
+				Assert.assertTrue(high <= 0);
+
+				int encoded = Bits.encodeAsInt(high, low);
+
+				Assert.assertEquals(Bits.decodeHighBits(encoded), high);
+				Assert.assertEquals(Bits.decodeLowBits(encoded), low);
+			}
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/util/CacheTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/util/CacheTest.java b/joshua-core/src/test/java/org/apache/joshua/util/CacheTest.java
new file mode 100644
index 0000000..53b8eb2
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/util/CacheTest.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class CacheTest {
+
+  @Test
+  public void test() {
+
+    Cache<String,Integer> cache = new Cache<String,Integer>(5);
+
+    cache.put("a", 1);
+    cache.put("b", 2);
+    cache.put("c", 3);
+    cache.put("d", 4);
+    cache.put("e", 5);
+
+    Assert.assertTrue(cache.containsKey("a"));
+    Assert.assertTrue(cache.containsKey("b"));
+    Assert.assertTrue(cache.containsKey("c"));
+    Assert.assertTrue(cache.containsKey("d"));
+    Assert.assertTrue(cache.containsKey("e"));
+
+    // Access the "a" element in the cache
+    cache.get("a");
+
+    // Now add a new element that exceeds the capacity of the cache
+    cache.put("f", 6);
+
+    Assert.assertTrue(cache.containsKey("a"));
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/util/CountsTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/util/CountsTest.java b/joshua-core/src/test/java/org/apache/joshua/util/CountsTest.java
new file mode 100644
index 0000000..e6a20a4
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/util/CountsTest.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for Counts class.
+ * 
+ * @author Lane Schwartz
+ */
+public class CountsTest {
+
+	@Test
+	public void verifyCounts() {
+		
+		Counts<Integer,Integer> counts = new Counts<Integer,Integer>();
+		
+		int maxA = 100;
+		int maxB = 100;
+		
+		// Increment counts
+		for (int a=0; a<maxA; a++) {
+			for (int b=0; b<maxB; b++) {
+				
+				for (int n=0, times=b%10; n<=times; n++) {
+					counts.incrementCount(a,b);
+					counts.incrementCount(null, b);
+				}
+				
+			}
+			
+			for (int n=0, times=10-a%10; n<times; n++) {
+				counts.incrementCount(a,null);
+			}
+		}
+		
+		// Verify co-occurrence counts
+		for (int a=0; a<maxA; a++) {
+			for (int b=0; b<maxB; b++) {
+				int expected = b%10 + 1;
+				Assert.assertEquals(counts.getCount(a, b), expected);
+				Assert.assertEquals(counts.getCount(null, b), maxA*expected);
+			}
+			
+			int expected = 10 - a%10;
+			Assert.assertEquals(counts.getCount(a, null), expected);
+		}
+		
+		// Verify totals for B counts
+		for (int b=0; b<maxB; b++) {
+			int expected = maxA * 2 * (b%10 + 1);
+			Assert.assertEquals(counts.getCount(b), expected);
+		}
+		
+		// Verify probabilities
+		for (int a=0; a<maxA; a++) {
+			for (int b=0; b<maxB; b++) {
+				float expected = 1.0f / (maxA*2);
+				Assert.assertEquals(counts.getProbability(a, b), expected);
+				Assert.assertEquals(counts.getProbability(null, b), 0.5f);
+			}
+			
+			int aCounter = 0;
+			for (int b=0; b<maxB; b++) {
+				for (int n=0, times=b%10; n<=times; n++) {
+					aCounter++;
+				}
+			}
+			for (int n=0, times=10-a%10; n<times; n++) {
+				aCounter++;
+			}
+				
+			float nullExpected = (float) (10-a%10) / (float) (aCounter);
+			Assert.assertEquals(counts.getReverseProbability(null, a), nullExpected);
+		
+		}
+			
+	}
+	
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/util/FormatUtilsTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/util/FormatUtilsTest.java b/joshua-core/src/test/java/org/apache/joshua/util/FormatUtilsTest.java
new file mode 100644
index 0000000..84b418b
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/util/FormatUtilsTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ package org.apache.joshua.util;
+
+import static org.apache.joshua.util.FormatUtils.cleanNonTerminal;
+import static org.apache.joshua.util.FormatUtils.escapeSpecialSymbols;
+import static org.apache.joshua.util.FormatUtils.isNonterminal;
+import static org.apache.joshua.util.FormatUtils.ensureNonTerminalBrackets;
+import static org.apache.joshua.util.FormatUtils.stripNonTerminalIndex;
+import static org.apache.joshua.util.FormatUtils.unescapeSpecialSymbols;
+import static org.junit.Assert.*;
+
+import org.junit.Test;
+
+public class FormatUtilsTest {
+  
+  @Test
+  public void givenTokens_whenIsNonTerminal_thenTokensCorrectlyClassified() {
+    assertTrue(isNonterminal("[X]"));
+    assertTrue(isNonterminal("[X,1]"));
+    assertFalse(isNonterminal("[]"));
+    assertFalse(isNonterminal("[X)"));
+  }
+  
+  @Test
+  public void givenTokens_whenCleanNonTerminal_thenCorrectlyCleaned() {
+    assertEquals(cleanNonTerminal("[GOAL]"), "GOAL");
+    assertEquals(cleanNonTerminal("[X]"), "X");
+    assertEquals(cleanNonTerminal("[X,1]"), "X");
+    assertEquals(cleanNonTerminal("bla"), "bla");
+    assertEquals(cleanNonTerminal("[bla"), "[bla");
+  }
+  
+  @Test
+  public void givenTokens_whenStripNonTerminalIndex_thenCorrectlyStripped() {
+    assertEquals(stripNonTerminalIndex("[X,1]"), "[X]");
+    assertEquals(stripNonTerminalIndex("[X,114]"), "[X]");
+    assertEquals(stripNonTerminalIndex("[X,]"), "[X]");
+    assertEquals(stripNonTerminalIndex("[X]"), "[X]");
+    assertEquals(stripNonTerminalIndex("[X"), "[[X]");
+  }
+  
+  @Test
+  public void givenTokens_whenMarkup_thenCorrectMarkup() {
+    assertEquals(ensureNonTerminalBrackets("X"), "[X]");
+  }
+  
+  @Test
+  public void givenSpecialSymbols_whenEscapeSpecialSymbols_thenCorrectlyEscaped() {
+    assertEquals(escapeSpecialSymbols("[ ] | ["), "-lsb- -rsb- -pipe- -lsb-");
+  }
+  
+  @Test
+  public void givenEscapedSpecialSymbols_whenUnEscapeSpecialSymbols_thenCorrectlyUnEscaped() {
+    assertEquals(unescapeSpecialSymbols("-lsb- -rsb- -pipe- -lsb-"), "[ ] | [");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/util/io/BinaryTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/util/io/BinaryTest.java b/joshua-core/src/test/java/org/apache/joshua/util/io/BinaryTest.java
new file mode 100644
index 0000000..6739b8b
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/util/io/BinaryTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.util.io;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class BinaryTest {
+
+  @Test
+  public void externalizeVocabulary() throws IOException, ClassNotFoundException {
+
+    Set<String> words = new HashSet<String>();
+
+    for (char c1='a'; c1<='z'; c1++) {
+      words.add(new String(new char[]{c1}));
+      for (char c2='a'; c2<='z'; c2++) {
+        words.add(new String(new char[]{c1,c2}));
+      }	
+    }
+
+    Vocabulary vocab = new Vocabulary();
+    vocab.addAll(words.toArray(new String[words.size()]));
+
+    try {
+
+      File tempFile = File.createTempFile(BinaryTest.class.getName(), "vocab");
+      FileOutputStream outputStream = new FileOutputStream(tempFile);
+      @SuppressWarnings({ "unused", "resource" })
+      ObjectOutput out = new BinaryOut(outputStream, true);
+      vocab.write(tempFile.toString());
+
+      @SuppressWarnings("resource")
+      ObjectInput in = new BinaryIn(tempFile.getAbsolutePath(), Vocabulary.class);
+      Object o = in.readObject();
+      Assert.assertTrue(o instanceof Vocabulary);
+
+      Vocabulary newVocab = (Vocabulary) o;
+
+      Assert.assertNotNull(newVocab);
+      Assert.assertEquals(newVocab.size(), vocab.size());
+
+      Assert.assertTrue(newVocab.equals(vocab));
+
+    } catch (SecurityException e) {
+      Assert.fail("Operating system is unable to create a temp file required by this unit test: " + e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/zmert/BLEUTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/zmert/BLEUTest.java b/joshua-core/src/test/java/org/apache/joshua/zmert/BLEUTest.java
new file mode 100644
index 0000000..9423d88
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/zmert/BLEUTest.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.zmert;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.Scanner;
+
+import org.apache.joshua.metrics.BLEU;
+import org.apache.joshua.metrics.EvaluationMetric;
+import org.testng.Assert;
+import org.testng.annotations.Parameters;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for BLEU class.
+ * 
+ * @author Lane Schwartz
+ * @version $LastChangedDate$
+ */
+public class BLEUTest {
+
+  @Test
+  public void metricName() {
+
+    // Setup the EvaluationMetric class
+    EvaluationMetric.set_numSentences(0);
+    EvaluationMetric.set_refsPerSen(1);
+    EvaluationMetric.set_refSentences(null);
+
+    BLEU bleu = new BLEU();
+
+    Assert.assertEquals(bleu.get_metricName(), "BLEU");
+
+  }
+
+  @Test
+  public void defaultConstructor() {
+
+    // Setup the EvaluationMetric class
+    EvaluationMetric.set_numSentences(0);
+    EvaluationMetric.set_refsPerSen(1);
+    EvaluationMetric.set_refSentences(null);
+
+    BLEU bleu = new BLEU();
+
+    // Default constructor should use a maximum n-gram length of 4
+    Assert.assertEquals(bleu.getMaxGramLength(), 4);
+
+    // Default constructor should use the closest reference
+    Assert.assertEquals(bleu.getEffLengthMethod(), BLEU.EffectiveLengthMethod.CLOSEST);
+
+  }
+
+  @Test
+  public void simpleTest() {
+
+    String ref = "this is the fourth chromosome whose sequence has been completed to date . it comprises more than 87 million pairs of dna .";
+    String test = "this is the fourth chromosome to be fully sequenced up till now and it comprises of over 87 million pairs of deoxyribonucleic acid ( dna ) .";
+
+    // refSentences[i][r] stores the r'th reference of the i'th sentence
+    String[][] refSentences = new String[1][1];
+    refSentences[0][0] = ref;
+
+    EvaluationMetric.set_numSentences(1);
+    EvaluationMetric.set_refsPerSen(1);
+    EvaluationMetric.set_refSentences(refSentences);
+
+    BLEU bleu = new BLEU();
+
+    // testSentences[i] stores the candidate translation for the i'th sentence
+    String[] testSentences = new String[1];
+    testSentences[0] = test;
+    try {
+      // Check BLEU score matches
+      double actualScore = bleu.score(testSentences);
+      double expectedScore = 0.2513;
+      double acceptableScoreDelta = 0.00001f;
+
+      Assert.assertEquals(actualScore, expectedScore, acceptableScoreDelta);
+
+      // Check sufficient statistics match
+      int[] actualSS = bleu.suffStats(testSentences);
+      int[] expectedSS = {14,27,8,26,5,25,3,24,27,23};
+
+      Assert.assertEquals(actualSS[0], expectedSS[0], 0); // 1-gram matches
+      Assert.assertEquals(actualSS[1], expectedSS[1], 0); // 1-gram total
+      Assert.assertEquals(actualSS[2], expectedSS[2], 0); // 2-gram matches
+      Assert.assertEquals(actualSS[3], expectedSS[3], 0); // 2-gram total
+      Assert.assertEquals(actualSS[4], expectedSS[4], 0); // 3-gram matches
+      Assert.assertEquals(actualSS[5], expectedSS[5], 0); // 3-gram total
+      Assert.assertEquals(actualSS[6], expectedSS[6], 0); // 4-gram matches
+      Assert.assertEquals(actualSS[7], expectedSS[7], 0); // 4-gram total
+      Assert.assertEquals(actualSS[8], expectedSS[8], 0); // candidate length
+      Assert.assertEquals(actualSS[9], expectedSS[9], 0); // reference length
+    } catch (Exception e) {
+      Assert.fail();
+    }
+  }
+
+  @Parameters({"referenceFile","testFile"})
+  @Test(enabled=false)
+  public void fileTest(String referenceFile, String testFile) throws FileNotFoundException {
+
+    //TODO You can now read in the files, and do something useful with them.
+
+    @SuppressWarnings("resource")
+    Scanner refScanner = new Scanner(new File(referenceFile));
+
+    while (refScanner.hasNextLine()) {
+
+      @SuppressWarnings("unused")
+      String refLine = refScanner.nextLine();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/resources/bn-en/hiero/.gitignore
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/bn-en/hiero/.gitignore b/joshua-core/src/test/resources/bn-en/hiero/.gitignore
new file mode 100644
index 0000000..1710208
--- /dev/null
+++ b/joshua-core/src/test/resources/bn-en/hiero/.gitignore
@@ -0,0 +1,4 @@
+diff
+log
+output
+output.scores