You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/17 10:32:00 UTC

[19/56] [partial] incubator-joshua git commit: maven multi-module layout 1st commit: moving files into joshua-core

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java b/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java
new file mode 100644
index 0000000..7e4c2cc
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.zmert;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.FileUtility;
+
+public class ZMERT {
+  public static void main(String[] args) throws Exception {
+    boolean external = false; // should each MERT iteration be launched externally?
+
+    if (args.length == 1) {
+      if (args[0].equals("-h")) {
+        printZMERTUsage(args.length, true);
+        System.exit(2);
+      } else {
+        external = false;
+      }
+    } else if (args.length == 3) {
+      external = true;
+    } else {
+      printZMERTUsage(args.length, false);
+      System.exit(1);
+    }
+
+    if (!external) {
+      JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+      MertCore myMert = new MertCore(args[0],joshuaConfiguration);
+      myMert.run_MERT(); // optimize lambda[]!!!
+      myMert.finish();
+    } else {
+      int maxMem = Integer.parseInt(args[1]);
+      String configFileName = args[2];
+      String stateFileName = FileUtility.dirname(configFileName) + "/ZMERT.temp.state";
+      String cp = System.getProperty("java.class.path");
+      boolean done = false;
+      int iteration = 0;
+      while (!done) {
+        ++iteration;
+        Runtime rt = Runtime.getRuntime();
+        Process p =
+            rt.exec("java -Xmx" + maxMem + "m -cp " + cp + " org.apache.joshua.zmert.MertCore "
+                + configFileName + " " + stateFileName + " " + iteration);
+        BufferedReader br_i = new BufferedReader(new InputStreamReader(p.getInputStream()));
+        BufferedReader br_e = new BufferedReader(new InputStreamReader(p.getErrorStream()));
+        String dummy_line = null;
+        while ((dummy_line = br_i.readLine()) != null) {
+          System.out.println(dummy_line);
+        }
+        while ((dummy_line = br_e.readLine()) != null) {
+          System.out.println(dummy_line);
+        }
+        int status = p.waitFor();
+
+        if (status == 90) {
+          done = true;
+        } else if (status == 91) {
+          done = false;
+        } else {
+          System.out.println("Z-MERT exiting prematurely (MertCore returned " + status + ")...");
+          System.exit(status);
+        }
+      }
+    }
+
+    System.exit(0);
+
+  } // main(String[] args)
+
+  public static void printZMERTUsage(int argsLen, boolean detailed) {
+    if (!detailed) {
+      println("Oops, you provided " + argsLen + " args!");
+      println("");
+      println("Usage:");
+      println("           ZMERT -maxMem maxMemoryInMB MERT_configFile");
+      println("");
+      println("Where -maxMem specifies the maximum amount of memory (in MB) Z-MERT is");
+      println("allowed to use when performing its calculations (no memroy is needed while");
+      println("the decoder is running),");
+      println("and the config file contains any subset of Z-MERT's 20-some parameters,");
+      println("one per line.  Run   ZMERT -h   for more details on those parameters.");
+    } else {
+      println("Usage:");
+      println("           ZMERT -maxMem maxMemoryInMB MERT_configFile");
+      println("");
+      println("Where -maxMem specifies the maximum amount of memory (in MB) Z-MERT is");
+      println("allowed to use when performing its calculations (no memroy is needed while");
+      println("the decoder is running),");
+      println("and the config file contains any subset of Z-MERT's 20-some parameters,");
+      println("one per line.  Those parameters, and their default values, are:");
+      println("");
+      println("Relevant files:");
+      println("  -dir dirPrefix: working directory\n    [[default: null string (i.e. they are in the current directory)]]");
+      println("  -s sourceFile: source sentences (foreign sentences) of the MERT dataset\n    [[default: null string (i.e. file name is not needed by MERT)]]");
+      println("  -r refFile: target sentences (reference translations) of the MERT dataset\n    [[default: reference.txt]]");
+      println("  -rps refsPerSen: number of reference translations per sentence\n    [[default: 1]]");
+      println("  -txtNrm textNormMethod: how should text be normalized?\n       (0) don't normalize text,\n    or (1) \"NIST-style\", and also rejoin 're, *'s, n't, etc,\n    or (2) apply 1 and also rejoin dashes between letters,\n    or (3) apply 1 and also drop non-ASCII characters,\n    or (4) apply 1+2+3\n    [[default: 1]]");
+      println("  -p paramsFile: file containing parameter names, initial values, and ranges\n    [[default: params.txt]]");
+      println("  -docInfo documentInfoFile: file informing Z-MERT which document each\n    sentence belongs to\n    [[default: null string (i.e. all sentences are in one 'document')]]");
+      println("  -fin finalLambda: file name for final lambda[] values\n    [[default: null string (i.e. no such file will be created)]]");
+      println("");
+      println("MERT specs:");
+      println("  -m metricName metric options: name of evaluation metric and its options\n    [[default: BLEU 4 closest]]");
+      println("  -maxIt maxMERTIts: maximum number of MERT iterations\n    [[default: 20]]");
+      println("  -prevIt prevMERTIts: maximum number of previous MERT iterations to\n    construct candidate sets from\n    [[default: 20]]");
+      println("  -minIt minMERTIts: number of iterations before considering an early exit\n    [[default: 5]]");
+      println("  -stopIt stopMinIts: some early stopping criterion must be satisfied in\n    stopMinIts *consecutive* iterations before an early exit\n    [[default: 3]]");
+      println("  -stopSig sigValue: early MERT exit if no weight changes by more than sigValue\n    [[default: -1 (i.e. this criterion is never investigated)]]");
+      println("  -thrCnt threadCount: number of threads to run in parallel when optimizing\n    [[default: 1]]");
+      println("  -save saveInter: save intermediate cfg files (1) or decoder outputs (2)\n    or both (3) or neither (0)\n    [[default: 3]]");
+      println("  -compress compressFiles: should Z-MERT compress the files it produces (1)\n    or not (0)\n    [[default: 0]]");
+      println("  -ipi initsPerIt: number of intermediate initial points per iteration\n    [[default: 20]]");
+      println("  -opi oncePerIt: modify a parameter only once per iteration (1) or not (0)\n    [[default: 0]]");
+      println("  -rand randInit: choose initial point randomly (1) or from paramsFile (0)\n    [[default: 0]]");
+      println("  -seed seed: seed used to initialize random number generator\n    [[default: time (i.e. value returned by System.currentTimeMillis()]]");
+      // println("  -ud useDisk: reliance on disk (0-2; higher value => more reliance)\n    [[default: 2]]");
+      println("");
+      println("Decoder specs:");
+      println("  -cmd commandFile: name of file containing commands to run the decoder\n    [[default: null string (i.e. decoder is a JoshuaDecoder object)]]");
+      println("  -passIt passIterationToDecoder: should iteration number be passed\n    to command file (1) or not (0)\n    [[default: 0]]");
+      println("  -decOut decoderOutFile: name of the output file produced by the decoder\n    [[default: output.nbest]]");
+      println("  -decExit validExit: value returned by decoder to indicate success\n    [[default: 0]]");
+      println("  -dcfg decConfigFile: name of decoder config file\n    [[default: dec_cfg.txt]]");
+      println("  -N N: size of N-best list (per sentence) generated in each MERT iteration\n    [[default: 100]]");
+      println("");
+      println("Output specs:");
+      println("  -v verbosity: Z-MERT verbosity level (0-2; higher value => more verbose)\n    [[default: 1]]");
+      println("  -decV decVerbosity: should decoder output be printed (1) or ignored (0)\n    [[default: 0]]");
+      println("");
+    }
+  }
+
+  private static void println(Object obj) {
+    System.out.println(obj);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/main/java/org/apache/joshua/zmert/package-info.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/zmert/package-info.java b/joshua-core/src/main/java/org/apache/joshua/zmert/package-info.java
new file mode 100644
index 0000000..571b524
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/zmert/package-info.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * Provides code for performing minimum error rate training.
+ * Much of the code in this package is based on Och (2003). 
+ * A deeper description of the algorithm is in Zaidan (2009).
+ */
+package org.apache.joshua.zmert;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/resources/log4j.properties b/joshua-core/src/main/resources/log4j.properties
new file mode 100644
index 0000000..acca5e9
--- /dev/null
+++ b/joshua-core/src/main/resources/log4j.properties
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# log4j settings
+log4j.rootLogger=WARN, stdout
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.err
+log4j.appender.stdout.layout=org.apache.log4j.SimpleLayout

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/overview.html
----------------------------------------------------------------------
diff --git a/joshua-core/src/overview.html b/joshua-core/src/overview.html
new file mode 100644
index 0000000..7efe5b3
--- /dev/null
+++ b/joshua-core/src/overview.html
@@ -0,0 +1,41 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head></head>
+<body bgcolor="white">
+
+<!--
+##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
+##### TYPE YOUR PACKAGE COMMENTS HERE.  BEGIN WITH A     #####
+##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE:    #####
+-->
+
+Apache Joshua is an extensible, open source statistical 
+hierarchical phrase-based machine translation system.
+
+<!--
+<h2>Related Documentation</h2>
+-->
+
+<!-- Put @see and @since tags down here. -->
+
+@see <a href="http://joshua.incubator.apache.org/">Joshua Website</a>
+
+
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/corpus/SpanTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/corpus/SpanTest.java b/joshua-core/src/test/java/org/apache/joshua/corpus/SpanTest.java
new file mode 100644
index 0000000..70fe67a
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/corpus/SpanTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.corpus;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ *
+ * 
+ * @author Lane Schwartz
+ */
+public class SpanTest {
+
+  @Test
+  public void iterator() {
+
+    Span span = new Span(1,10);
+
+    int expected = 1;
+
+    for (int actual : span) {
+      Assert.assertEquals(actual, expected);
+      expected++;
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/corpus/VocabularyTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/corpus/VocabularyTest.java b/joshua-core/src/test/java/org/apache/joshua/corpus/VocabularyTest.java
new file mode 100644
index 0000000..e3042ed
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/corpus/VocabularyTest.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.corpus;
+
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import static org.apache.joshua.util.FormatUtils.isNonterminal;
+import static org.testng.Assert.assertTrue;
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+
+import java.io.File;
+import java.io.IOException;
+
+public class VocabularyTest {
+  private static final String WORD1 = "word1";
+  private static final String WORD2 = "word2";
+  private static final String NON_TERMINAL = "[X]";
+  private static final String GOAL = "[GOAL]";
+
+  @BeforeMethod
+  public void init() {
+    Vocabulary.clear();
+  }
+
+  @AfterMethod
+  public void deinit() {
+    Vocabulary.clear();
+  }
+
+  @Test
+  public void givenVocabulary_whenEmpty_thenOnlyContainsUnknownWord() {
+    assertTrue(Vocabulary.hasId(Vocabulary.UNKNOWN_ID));
+    assertFalse(Vocabulary.hasId(1));
+    assertFalse(Vocabulary.hasId(-1));
+    assertEquals(Vocabulary.UNKNOWN_WORD, Vocabulary.word(Vocabulary.UNKNOWN_ID));
+    assertEquals(1, Vocabulary.size());
+  }
+
+  @Test
+  public void givenVocabulary_whenNewWord_thenMappingIsAdded() {
+    final int FIRST_WORD_ID = 1;
+    assertFalse(Vocabulary.hasId(FIRST_WORD_ID));
+    assertEquals(FIRST_WORD_ID, Vocabulary.id(WORD1));
+    //should return same id after second call:
+    assertEquals(FIRST_WORD_ID, Vocabulary.id(WORD1));
+    assertTrue(Vocabulary.hasId(FIRST_WORD_ID));
+    assertEquals(WORD1, Vocabulary.word(FIRST_WORD_ID));
+    assertEquals(2, Vocabulary.size());
+  }
+
+  @Test
+  public void givenVocabulary_whenCheckingStringInBracketsOrNegativeNumber_thenIsNonTerminal() {
+    //non-terminals
+    assertTrue(isNonterminal(NON_TERMINAL));
+    //terminals
+    assertFalse(isNonterminal(WORD1));
+    assertFalse(isNonterminal("[]"));
+    assertFalse(isNonterminal("["));
+    assertFalse(isNonterminal("]"));
+    assertFalse(isNonterminal(""));
+
+    //negative numbers indicate non-terminals
+    assertTrue(isNonterminal(-1));
+    assertTrue(isNonterminal(-5));
+
+    //positive numbers indicate terminals:
+    assertFalse(isNonterminal(0));
+    assertFalse(isNonterminal(5));
+
+
+  }
+
+  @Test
+  public void givenVocabulary_whenNonTerminal_thenReturnsStrictlyPositiveNonTerminalIndices() {
+    final int FIRST_NON_TERMINAL_INDEX = 1;
+    assertTrue(Vocabulary.id(NON_TERMINAL) < 0);
+    assertTrue(Vocabulary.hasId(FIRST_NON_TERMINAL_INDEX));
+    assertTrue(Vocabulary.hasId(-FIRST_NON_TERMINAL_INDEX));
+
+    assertTrue(Vocabulary.id("") > 0);
+    assertTrue(Vocabulary.id(WORD1) > 0);
+
+    final int SECOND_NON_TERMINAL_INDEX = 4;
+    assertTrue(Vocabulary.id(GOAL) < 0);
+    assertTrue(Vocabulary.hasId(SECOND_NON_TERMINAL_INDEX));
+    assertTrue(Vocabulary.hasId(-SECOND_NON_TERMINAL_INDEX));
+
+    assertTrue(Vocabulary.id(WORD2) > 0);
+  }
+
+  @Test
+  public void givenVocabulary_whenWritenAndReading_thenVocabularyStaysTheSame() throws IOException {
+    File vocabFile = File.createTempFile( "vocab", "tmp");
+    vocabFile.deleteOnExit();
+
+    int id1 = Vocabulary.id(WORD1);
+    int id2 = Vocabulary.id(NON_TERMINAL);
+    int id3 = Vocabulary.id(WORD2);
+
+    Vocabulary.write(vocabFile.getAbsolutePath());
+
+    Vocabulary.clear();
+
+    Vocabulary.read(vocabFile);
+
+    assertEquals(4, Vocabulary.size()); //unknown word + 3 other words
+    assertTrue(Vocabulary.hasId(id1));
+    assertTrue(Vocabulary.hasId(id2));
+    assertTrue(Vocabulary.hasId(id3));
+    assertEquals(id1, Vocabulary.id(WORD1));
+    assertEquals(id2, Vocabulary.id(NON_TERMINAL));
+    assertEquals(id3, Vocabulary.id(WORD2));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java
new file mode 100644
index 0000000..439e486
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.ff.lm.buildin_lm.TrieLM;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for testing ARPA language model class.
+ *
+ * @author Lane Schwartz
+ */
+public class ArpaFileTest {
+
+  String arpaFileName;
+
+  Vocabulary vocab;
+
+  @Test
+  public void setup() {
+
+    vocab = new Vocabulary();
+    vocab.id("a");
+    vocab.id("because");
+    vocab.id("boycott");
+    vocab.id("of");
+    vocab.id("parliament");
+    vocab.id("potato");
+    vocab.id("resumption");
+    vocab.id("the");
+
+    try {
+      File file = File.createTempFile("testLM", "arpa");
+      PrintStream out = new PrintStream(file, "UTF-8");
+
+      out.println();
+      out.println("\\data\\");
+      out.println("ngram 1=8");
+      out.println("ngram 2=4");
+      out.println("ngram 3=1");
+      out.println();
+
+      out.println("\\1-grams:");
+      out.println("-1.992672	a	-0.1195484");
+      out.println("-2.713723	because	-0.4665429");
+      out.println("-4.678545	boycott	-0.0902521");
+      out.println("-1.609573	of	-0.1991907");
+      out.println("-3.875917	parliament	-0.1274891");
+      out.println("-9.753210	potato");
+      out.println("-4.678545	resumption	-0.07945678");
+      out.println("-1.712444	the	-0.1606644");
+
+      out.println();
+      out.println("\\2-grams:");
+      out.println("-0.3552987	because of	-0.03083654");
+      out.println("-1.403534	of a");
+      out.println("-0.7507797	of the	-0.05237135");
+      out.println("-0.7266324	resumption of");
+      out.println("-3.936147	the resumption");
+
+      out.println();
+      out.println("\\3-grams:");
+      out.println("-0.6309999	because of the");
+      out.println();
+
+      out.println("\\end\\");
+
+      out.close();
+      this.arpaFileName = file.getAbsolutePath();
+
+    } catch (IOException e) {
+      Assert.fail("Unable to create temporary file: " + e.toString());
+    }
+
+  }
+
+  @Test(dependsOnMethods = { "setup" })
+  public void testOrder() {
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    try {
+      Assert.assertEquals(arpaFile.getOrder(), 3);
+    } catch (FileNotFoundException e) {
+      Assert.fail(e.toString());
+    }
+  }
+
+  @Test(dependsOnMethods = { "setup" })
+  public void testIteration() {
+
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    Map<Integer, Integer> counts = new HashMap<>();
+
+    boolean iterationOccurred = false;
+
+    for (ArpaNgram ngram : arpaFile) {
+
+      iterationOccurred = true;
+
+      int order = ngram.order();
+      int count;
+      if (counts.containsKey(order)) {
+        count = counts.get(order) + 1;
+      } else {
+        count = 1;
+      }
+
+      counts.put(order, count);
+
+    }
+
+    Assert.assertTrue(iterationOccurred);
+
+    Assert.assertTrue(counts.containsKey(1));
+    Assert.assertTrue(counts.containsKey(2));
+    Assert.assertTrue(counts.containsKey(3));
+
+    Assert.assertEquals((int) counts.get(1), 8);
+    Assert.assertEquals((int) counts.get(2), 5);
+    Assert.assertEquals((int) counts.get(3), 1);
+
+  }
+
+  @Test(dependsOnMethods = { "setup" })
+  public void testSize() {
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    Assert.assertEquals(arpaFile.size(), 14);
+  }
+
+  @Test(dependsOnMethods = { "setup", "testIteration" })
+  public void testChildren() throws FileNotFoundException {
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    TrieLM lm = new TrieLM(arpaFile);
+    Assert.assertNotSame(lm.getChildren().size(), 0);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
new file mode 100644
index 0000000..5e71352
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.joshua.decoder.ff.lm;
+
+import static org.hamcrest.CoreMatchers.*;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.testng.Assert.assertEquals;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.ff.FeatureVector;
+import org.apache.joshua.decoder.ff.state_maintenance.NgramDPState;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class LanguageModelFFTest {
+
+  private static final float WEIGHT = 0.5f;
+
+  private LanguageModelFF ff;
+
+  @BeforeMethod
+  public void setUp() {
+    Decoder.resetGlobalState();
+
+    FeatureVector weights = new FeatureVector();
+    weights.set("lm_0", WEIGHT);
+    String[] args = {"-lm_type", "berkeleylm", "-lm_order", "2", "-lm_file", "./src/test/resources/lm/berkeley/lm"};
+
+    JoshuaConfiguration config = new JoshuaConfiguration();
+    ff = new LanguageModelFF(weights, args, config);
+  }
+
+  @AfterMethod
+  public void tearDown() {
+    Decoder.resetGlobalState();
+  }
+
+  @Test
+  public void givenNonStartSymbol_whenEstimateFutureCost_thenMultipleWeightAndLogProbabilty() {
+    int[] left = {3};
+    NgramDPState currentState = new NgramDPState(left, new int[left.length]);
+
+    float score =  ff.getLM().sentenceLogProbability(left, 2, 1);
+    assertEquals(-99.0f, score, 0.0f);
+
+    float cost = ff.estimateFutureCost(null, currentState, null);
+    assertEquals(score * WEIGHT, cost, 0.0f);
+  }
+
+  @Test
+  public void givenOnlyStartSymbol_whenEstimateFutureCost_thenZeroResult() {
+    int startSymbolId = Vocabulary.id(Vocabulary.START_SYM);
+    int[] left = {startSymbolId};
+    NgramDPState currentState = new NgramDPState(left, new int[left.length]);
+
+    float score = ff.getLM().sentenceLogProbability(left, 2, 2);
+    assertEquals(0.0f, score, 0.0f);
+
+    float cost = ff.estimateFutureCost(null, currentState, null);
+    assertEquals(cost, score * WEIGHT, 0.0f);
+  }
+
+  @Test
+  public void givenStartAndOneMoreSymbol_whenEstimateFutureCost_thenMultipleWeightAndLogProbabilty() {
+    int startSymbolId = Vocabulary.id(Vocabulary.START_SYM);
+    assertThat(startSymbolId, not(equalTo(3)));
+    int[] left = {startSymbolId, 3};
+    NgramDPState currentState = new NgramDPState(left, new int[left.length]);
+
+    float score = ff.getLM().sentenceLogProbability(left, 2, 2);
+    assertEquals(score, -100.752754f, 0.0f);
+
+    float cost = ff.estimateFutureCost(null, currentState, null);
+    assertEquals(cost, score * WEIGHT, 0.0f);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
new file mode 100644
index 0000000..7752785
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm.berkeley_lm;
+
+import edu.berkeley.nlp.lm.ArrayEncodedNgramLanguageModel;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.assertFalse;
+
+import static org.testng.Assert.assertEquals;
+
+public class LMBerkeleySentenceProbablityTest {
+
+  @Test
+  public void verifySentenceLogProbability() {
+    LMGrammarBerkeley grammar = new LMGrammarBerkeley(2, "resources/berkeley_lm/lm");
+    grammar.registerWord("the", 2);
+    grammar.registerWord("chat-rooms", 3);
+    grammar.registerWord("<unk>", 0);
+
+    ArrayEncodedNgramLanguageModel<String> lm = grammar.getLM();
+    float expected =
+        lm.getLogProb(new int[] {}, 0, 0)
+        + lm.getLogProb(new int[] {0}, 0, 1)
+        + lm.getLogProb(new int[] {0, 2}, 0, 2)
+        + lm.getLogProb(new int[] {2, 3}, 0, 2)
+        + lm.getLogProb(new int[] {3, 0}, 0, 2);
+
+    float result = grammar.sentenceLogProbability(new int[] {0, 2, 3, 0}, 2, 0);
+    assertEquals(expected, result, 0.0);
+  }
+  
+  @Test
+  public void givenUnknownWord_whenIsOov_thenCorrectlyDetected() {
+    LMGrammarBerkeley lm = new LMGrammarBerkeley(2, "resources/berkeley_lm/lm");
+    assertTrue(lm.isOov(Vocabulary.id("UNKNOWN_WORD")));
+    assertFalse(lm.isOov(Vocabulary.id("chat-rooms")));
+  }
+  
+  @AfterMethod
+  public void tearDown() {
+    Vocabulary.clear();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
new file mode 100644
index 0000000..b0612d4
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm.berkeley_lm;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+/**
+ * Replacement for test/lm/berkeley/test.sh regression test
+ */
+
+public class LMGrammarBerkeleyTest {
+
+  private static final String INPUT = "the chat-rooms";
+  private static final String EXPECTED_OUTPUT = "tm_glue_0=2.000 lm_0=-7.153\n";
+  private static final String EXPECTED_OUTPUT_WITH_OOV = "tm_glue_0=2.000 lm_0=-7.153 lm_0_oov=0.000\n";
+  private static final String[] OPTIONS = "-v 0 -output-format %f".split(" ");
+
+  private JoshuaConfiguration joshuaConfig;
+  private Decoder decoder;
+
+  @DataProvider(name = "languageModelFiles")
+  public Object[][] lmFiles() {
+    return new Object[][]{{"resources/berkeley_lm/lm"},
+            {"resources/berkeley_lm/lm.gz"},
+            {"resources/berkeley_lm/lm.berkeleylm"},
+            {"resources/berkeley_lm/lm.berkeleylm.gz"}};
+  }
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    decoder.cleanUp();
+  }
+
+  @Test(dataProvider = "languageModelFiles")
+  public void verifyLM(String lmFile) {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.processCommandLineOptions(OPTIONS);
+    joshuaConfig.features.add("LanguageModel -lm_type berkeleylm -lm_order 2 -lm_file " + lmFile);
+    decoder = new Decoder(joshuaConfig, null);
+    final String translation = decode(INPUT).toString();
+    assertEquals(translation, EXPECTED_OUTPUT);
+  }
+
+  private Translation decode(String input) {
+    final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+    return decoder.decode(sentence);
+  }
+
+  @Test
+  public void givenLmWithOovFeature_whenDecoder_thenCorrectFeaturesReturned() {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.processCommandLineOptions(OPTIONS);
+    joshuaConfig.features.add("LanguageModel -lm_type berkeleylm -oov_feature -lm_order 2 -lm_file resources/berkeley_lm/lm");
+    decoder = new Decoder(joshuaConfig, null);
+    final String translation = decode(INPUT).toString();
+    assertEquals(Decoder.weights.getDenseFeatures().size(), 3);
+    assertEquals(translation, EXPECTED_OUTPUT_WITH_OOV);
+  }
+
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
new file mode 100644
index 0000000..5946abd
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm.class_lm;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.ff.FeatureVector;
+import org.apache.joshua.decoder.ff.lm.LanguageModelFF;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * This unit test relies on KenLM.  If the KenLM library is not found when the test is run all tests will be skipped.
+ */
+public class ClassBasedLanguageModelTest {
+
+  private static final float WEIGHT = 0.5f;
+
+  private LanguageModelFF ff;
+
+  @BeforeMethod
+  public void setUp() {
+    Decoder.resetGlobalState();
+
+    FeatureVector weights = new FeatureVector();
+    weights.set("lm_0", WEIGHT);
+    String[] args = { "-lm_type", "kenlm", "-lm_order", "9",
+      "-lm_file", "./src/test/resources/lm/class_lm/class_lm_9gram.gz",
+      "-class_map", "./src/test/resources/lm/class_lm/class.map" };
+
+    JoshuaConfiguration config = new JoshuaConfiguration();
+    KenLmTestUtil.Guard(() -> ff = new LanguageModelFF(weights, args, config));
+  }
+
+  @AfterMethod
+  public void tearDown() {
+    Decoder.resetGlobalState();
+  }
+
+  @Test
+  public void givenLmDefinition_whenInitialized_thenInitializationIsCorrect() {
+    assertTrue(ff.isClassLM());
+    assertTrue(ff.isStateful());
+  }
+
+  @Test
+  public void givenRuleWithSingleWord_whenGetRuleId_thenIsMappedToClass() {
+    final int[] target = Vocabulary.addAll(new String[] { "professionalism" });
+    final Rule rule = new Rule(0, null, target, new FeatureVector(), 0, OwnerMap.register(OwnerMap.UNKNOWN_OWNER));
+    assertEquals(Vocabulary.word(ff.getRuleIds(rule)[0]), "13");
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassMapTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassMapTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassMapTest.java
new file mode 100644
index 0000000..5d37a05
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassMapTest.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm.class_lm;
+
+import static org.testng.Assert.assertEquals;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.ff.lm.ClassMap;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+
+public class ClassMapTest {
+
+  private static final int EXPECTED_CLASS_MAP_SIZE = 5140;
+
+  @BeforeMethod
+  public void setUp() {
+    Decoder.resetGlobalState();
+  }
+
+  @AfterMethod
+  public void tearDown() {
+    Decoder.resetGlobalState();
+  }
+
+  @Test
+  public void givenClassMapFile_whenClassMapRead_thenEntriesAreRead() {
+    // GIVEN
+    final String classMapFile = "./src/test/resources/lm/class_lm/class.map";
+
+    // WHEN
+    final ClassMap classMap = new ClassMap(classMapFile);
+
+    // THEN
+    assertEquals(classMap.size(), EXPECTED_CLASS_MAP_SIZE);
+    assertEquals(
+      Vocabulary.word(
+        classMap.getClassID(
+          Vocabulary.id("professionalism"))),
+      "13");
+    assertEquals(
+      Vocabulary.word(
+        classMap.getClassID(
+          Vocabulary.id("convenience"))),
+      "0");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/tm/OwnerMapTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/tm/OwnerMapTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/tm/OwnerMapTest.java
new file mode 100644
index 0000000..8d129e1
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/tm/OwnerMapTest.java
@@ -0,0 +1,39 @@
+package org.apache.joshua.decoder.ff.tm;
+
+import static org.testng.Assert.assertEquals;
+
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+public class OwnerMapTest {
+  
+  @BeforeMethod
+  public void setUp() throws Exception {
+    OwnerMap.clear();
+  }
+  
+  @AfterMethod
+  public void tearDown() throws Exception {
+    OwnerMap.clear();
+  }
+  
+  @Test(expectedExceptions = IllegalArgumentException.class)
+  public void given_invalidId_thenThrowsException() {
+    OwnerMap.getOwner(new OwnerId(3));
+  }
+  
+  @Test
+  public void givenOwner_whenRegisteringOwner_thenMappingIsCorrect() {
+    // GIVEN
+    String owner = "owner";
+    
+    // WHEN
+    OwnerId id = OwnerMap.register(owner);
+    OwnerId id2 = OwnerMap.register(owner);
+    
+    // THEN
+    assertEquals(id, id2);
+    assertEquals(owner, OwnerMap.getOwner(id));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java
new file mode 100644
index 0000000..88b2350
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.io;
+
+import static org.testng.Assert.assertEquals;
+
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ *
+ */
+public class DeNormalizeTest {
+
+  private String tokenized;
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @BeforeMethod
+  protected void setUp() throws Exception {
+    tokenized = "my son 's friend , however , plays a high - risk game .";
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#processSingleLine(java.lang.String)}.
+   */
+  @Test(enabled = true)
+  public void testProcessSingleLine() {
+    tokenized =
+        "my son 's friend , ( dr . -rrb- robotnik , phd , however , wo n't play a high - risk game .";
+    String expected = "My son's friend, (Dr.) robotnik, PhD, however, won't play a high-risk game.";
+    String actual = DeNormalize.processSingleLine(tokenized);
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#processSingleLine(java.lang.String)}.
+   */
+  @Test
+  public void testProcessSingleLine_interspersed() {
+    tokenized = "phd mphil";
+    String expected = "PhD MPhil";
+    String actual = DeNormalize.processSingleLine(tokenized);
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for
+   * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+   */
+  @Test
+  public void testCapitalizeLineFirstLetter() throws Exception {
+    String actual = DeNormalize.capitalizeLineFirstLetter(tokenized);
+    String expected = "My son 's friend , however , plays a high - risk game .";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for
+   * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+   */
+  @Test
+  public void testCapitalizeLineFirstLetter_empty() throws Exception {
+    String actual = DeNormalize.capitalizeLineFirstLetter("");
+    String expected = "";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for
+   * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+   */
+  @Test
+  public void testCapitalizeLineFirstLetter_singleNumberCharacter() throws Exception {
+    String actual = DeNormalize.capitalizeLineFirstLetter("1");
+    String expected = "1";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for
+   * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+   */
+  @Test
+  public void testCapitalizeLineFirstLetter_singleLetterCharacter() throws Exception {
+    String actual = DeNormalize.capitalizeLineFirstLetter("a");
+    String expected = "A";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinPunctuationMarks(java.lang.String)}.
+   */
+  @Test
+  public void testJoinPunctuationMarks() throws Exception {
+    String actual = DeNormalize.joinPunctuationMarks(tokenized);
+    String expected = "my son 's friend, however, plays a high - risk game.";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinPunctuationMarks(java.lang.String)}.
+   */
+  @Test
+  public void testJoinPunctuationMarks_empty() throws Exception {
+    String actual = DeNormalize.joinPunctuationMarks("");
+    String expected = "";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+   */
+  @Test
+  public void testJoinHyphen() throws Exception {
+    String actual = DeNormalize.joinHyphen(tokenized);
+    String expected = "my son 's friend , however , plays a high-risk game .";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+   */
+  @Test
+  public void testJoinHypen_empty() throws Exception {
+    String actual = DeNormalize.joinHyphen("");
+    String expected = "";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+   */
+  @Test
+  public void testJoinHyphen_1space_btw_2hyphens() throws Exception {
+    String actual = DeNormalize.joinHyphen("a - - b");
+    String expected = "a-- b";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+   */
+  @Test
+  public void testJoinHyphen_2spaces_btw_2hyphens() throws Exception {
+    String actual = DeNormalize.joinHyphen("a -  - b");
+    String expected = "a--b";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinContractions(java.lang.String)}.
+   */
+  @Test
+  public void testJoinContractions() throws Exception {
+    tokenized = "my son 's friend , however , wo n't play a high - risk game .";
+    String actual = DeNormalize.joinContractions(tokenized);
+    String expected = "my son's friend , however , won't play a high - risk game .";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinContractions(java.lang.String)}.
+   */
+  @Test
+  public void testJoinContractions_empty() throws Exception {
+    String actual = DeNormalize.joinContractions("");
+    String expected = "";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for
+   * {@link joshua.decoder.io.DeNormalize#capitalizeNameTitleAbbrvs(java.lang.String)}.
+   */
+  @Test
+  public void testCapitalizeNameTitleAbbrvs() throws Exception {
+    String actual, expected;
+    tokenized =
+        "my son 's friend , dr . robotnik , phd , however , wo n't play a high - risk game .";
+    expected =
+        "my son 's friend , Dr . robotnik , PhD , however , wo n't play a high - risk game .";
+    actual = DeNormalize.capitalizeNameTitleAbbrvs(tokenized);
+    assertEquals(actual, expected);
+
+    tokenized = "mr mrs ms miss dr prof";
+    expected = "Mr Mrs Ms Miss Dr Prof";
+    actual = DeNormalize.capitalizeNameTitleAbbrvs(tokenized);
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#capitalizeI(java.lang.String)}.
+   */
+  @Test
+  public void testCapitalizeI() throws Exception {
+    String expected, actual;
+
+    tokenized = "sam i am";
+    expected = "sam I am";
+    actual = DeNormalize.capitalizeI(tokenized);
+    assertEquals(actual, expected);
+
+    tokenized = "sam iam";
+    expected = "sam iam";
+    actual = DeNormalize.capitalizeI(tokenized);
+    assertEquals(actual, expected);
+
+    tokenized = "sami am";
+    expected = "sami am";
+    actual = DeNormalize.capitalizeI(tokenized);
+    assertEquals(actual, expected);
+
+    tokenized = "samiam";
+    expected = "samiam";
+    actual = DeNormalize.capitalizeI(tokenized);
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#replaceBracketTokens(java.lang.String)}.
+   */
+  @Test
+  public void testReplaceBracketTokens() throws Exception {
+    String expected, actual;
+
+    tokenized = "-lrb- i -rrb-";
+    expected = "( i )";
+    actual = DeNormalize.replaceBracketTokens(tokenized);
+    assertEquals(actual, expected);
+
+    tokenized = "-LRB- i -RRB-";
+    expected = "( i )";
+    actual = DeNormalize.replaceBracketTokens(tokenized);
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#detokenizeBracketTokens(java.lang.String)}
+   */
+  @Test
+  public void testDetokenizeBracketTokens() throws Exception {
+    String expected, actual;
+
+    tokenized = "( i )";
+    expected = "(i)";
+    actual = DeNormalize.joinPunctuationMarks(tokenized);
+    assertEquals(actual, expected);
+
+    tokenized = "[ i } j";
+    expected = "[i} j";
+    actual = DeNormalize.joinPunctuationMarks(tokenized);
+    assertEquals(actual, expected);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestStreamTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestStreamTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestStreamTest.java
new file mode 100644
index 0000000..a09aebb
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestStreamTest.java
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.io;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+
+import static org.mockito.Mockito.mock;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.fail;
+
+/**
+ * This class verifies the following behaviors:
+ * <p>
+ * - A blank input, i.e. "", does not cause a translation to be created.
+ * <p>
+ * - A non-blank input that is not followed by a newline, e.g. "1", causes a translation to be
+ * created.
+ * <p>
+ * - An input that contains whitespace or nothing followed by a newline causes a translation to be
+ * created, with "" as the source.
+ */
+
+public class TranslationRequestStreamTest {
+
+  private final JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+
+  @BeforeMethod
+  public void createTranslationRequest() throws Exception {
+  }
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @BeforeMethod
+  protected void setUp() {
+  }
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @AfterMethod
+  protected void tearDown() throws Exception {
+  }
+
+  /**
+   * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream#TranslationRequestStream(BufferedReader, JoshuaConfiguration)}.
+   */
+  @Test(enabled = false)
+  public void testTranslationRequest() {
+    fail("Not yet implemented");
+  }
+
+  /**
+   * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream#size()}.
+   */
+  @Test
+  public void testSize_uponConstruction() {
+    InputStream in = mock(InputStream.class);
+    TranslationRequestStream request = new TranslationRequestStream(
+            new BufferedReader(new InputStreamReader(in, Charset.defaultCharset())), joshuaConfiguration);
+    assertEquals(request.size(), 0);
+  }
+
+  /**
+   * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream#size()}.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testSize_1() throws Exception {
+    byte[] data = "1".getBytes();
+    ByteArrayInputStream input = new ByteArrayInputStream(data);
+    TranslationRequestStream request = new TranslationRequestStream(
+            new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+    request.next();
+    assertEquals(request.size(), 1);
+  }
+
+  /**
+   * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream##size()}.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testSize_newline() throws Exception {
+    byte[] data = "\n".getBytes();
+    ByteArrayInputStream input = new ByteArrayInputStream(data);
+    TranslationRequestStream request = new TranslationRequestStream(
+            new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+    request.next();
+    assertEquals(request.size(), 1);
+  }
+
+  /**
+   * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream##size()}.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testSize_2newlines() throws Exception {
+    byte[] data = "\n\n".getBytes();
+    ByteArrayInputStream input = new ByteArrayInputStream(data);
+    TranslationRequestStream request = new TranslationRequestStream(
+            new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+    request.next();
+    request.next();
+    assertEquals(request.size(), 2);
+  }
+
+  /**
+   * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream##next()}.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testNext_2Newlines() throws Exception {
+    byte[] data = "\n\n".getBytes();
+    ByteArrayInputStream input = new ByteArrayInputStream(data);
+    TranslationRequestStream request = new TranslationRequestStream(
+            new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+    assertEquals(request.next().source(), "");
+    assertEquals(request.next().source(), "");
+  }
+
+  /**
+   * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream##remove()}.
+   */
+  @Test(enabled = false)
+  public void testRemove() {
+    fail("Not yet implemented");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
new file mode 100644
index 0000000..5b9db06
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.kbest_extraction;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import static com.google.common.base.Charsets.UTF_8;
+import static java.nio.file.Files.readAllBytes;
+import static org.testng.Assert.assertEquals;
+
+/**
+ * Reimplements the kbest extraction regression test
+ * TODO (fhieber): this test strangely only works with StateMinimizing KenLM.
+ * This is to be investigated
+ */
+
+public class KBestExtractionTest {
+
+  private static final String CONFIG = "resources/kbest_extraction/joshua.config";
+  private static final String INPUT = "a b c d e";
+  private static final Path GOLD_PATH = Paths.get("resources/kbest_extraction/output.scores.gold");
+
+  private JoshuaConfiguration joshuaConfig = null;
+  private Decoder decoder = null;
+
+  @BeforeMethod
+  public void setUp() throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.readConfigFile(CONFIG);
+    joshuaConfig.outputFormat = "%i ||| %s ||| %c";
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+  }
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    decoder.cleanUp();
+    decoder = null;
+  }
+
+  @Test
+  public void givenInput_whenKbestExtraction_thenOutputIsAsExpected() throws IOException {
+    final String translation = decode(INPUT).toString();
+    final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
+    assertEquals(gold, translation);
+  }
+
+  private Translation decode(String input) {
+    final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+    return decoder.decode(sentence);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/CoverageTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/CoverageTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/CoverageTest.java
new file mode 100644
index 0000000..c6512da
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/CoverageTest.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.phrase;
+
+import org.testng.annotations.Test;
+
+import java.util.BitSet;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+import static org.testng.AssertJUnit.assertFalse;
+
+public class CoverageTest {
+
+  @Test
+  public void testSet() {
+    Coverage cov = new Coverage();
+    cov.set(1,2);
+    cov.set(3,4);
+    cov.set(2,3);
+    cov.set(0,1);
+
+    assertFalse(cov.compatible(0, 1));
+    assertFalse(cov.compatible(0, 5));
+    assertTrue(cov.compatible(4, 6));
+    
+    assertEquals(cov.toString(), "4 ..........");
+  }
+  
+  @Test
+  public void testPattern() {
+    Coverage cov = new Coverage();
+    cov.set(5,6);
+    cov.set(0,4);
+    BitSet bits = cov.pattern(4, 5);
+    BitSet answerBits = new BitSet();
+    answerBits.set(0);
+    assertEquals(bits, answerBits);
+  }
+  
+  @Test
+  public void testCopyConstructor() {
+    Coverage a = new Coverage();
+    a.set(2,3);
+    Coverage b = new Coverage(a);
+    b.set(4,5);
+    
+    assertFalse(a.toString().equals(b.toString()));
+  }
+  
+  @Test
+  public void testCompatible() {
+    Coverage a = new Coverage();
+    a.set(10, 14);
+    
+    assertTrue(a.compatible(14, 16));
+    assertTrue(a.compatible(6, 10));
+    assertTrue(a.compatible(1, 10));
+    assertTrue(a.compatible(1, 9));
+    assertFalse(a.compatible(9, 11));
+    assertFalse(a.compatible(13, 15));
+    assertFalse(a.compatible(9, 15));
+    assertFalse(a.compatible(9, 14));
+    assertFalse(a.compatible(10, 15));
+    
+    a.set(0,9);
+    
+    for (int width = 1; width <= 3; width++) {
+      for (int i = 0; i < 20; i++) {
+        int j = i + width;
+        if ((i == 9 && j == 10) || i >= 14) 
+          assertTrue(a.compatible(i,j));
+        else {
+//          System.err.println(String.format("%d,%d -> %s  %s", i, j, a.compatible(i,j), a));
+          assertFalse(a.compatible(i,j));
+        }
+      }
+    }
+  }
+   
+  @Test
+  public void testFirstZero() {
+    Coverage cov = new Coverage();
+    cov.set(2, 5);
+    assertEquals(cov.firstZero(), 0);
+    cov.set(8,10);
+    assertEquals(cov.firstZero(), 0);
+    cov.set(0, 2);
+    assertEquals(cov.firstZero(), 5);
+    cov.set(5, 7);
+    assertEquals(cov.firstZero(), 7);
+    cov.set(7,8);
+    assertEquals(cov.firstZero(), 10);
+  }
+   
+  @Test
+  public void testOpenings() {
+    Coverage cov = new Coverage();
+    cov.set(0, 2);
+    cov.set(8, 10);
+    
+    for (int i = 2; i < 7; i++) {
+      assertEquals(cov.leftOpening(i), 2);
+      assertEquals(cov.rightOpening(i, 17), 8);
+      assertEquals(cov.rightOpening(i, 7), 7);
+    }
+  }
+
+  @Test
+  public void testEquals() {
+    Coverage cov = new Coverage();
+    cov.set(9, 11);
+    Coverage cov2 = new Coverage();
+    cov2.set(9,10);
+    cov2.set(10,11);
+    assertEquals(cov, cov2);
+  }
+  
+  @Test
+  public void testToString() {
+    Coverage cov = new Coverage();
+    cov.set(0, 40);
+    cov.set(44, 49);
+    assertEquals(cov.toString(), "40 ....xxxxx.");
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
new file mode 100644
index 0000000..8a68ab7
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ package org.apache.joshua.decoder.phrase.constrained;
+
+import static com.google.common.base.Charsets.UTF_8;
+import static java.nio.file.Files.readAllBytes;
+import static org.testng.Assert.assertEquals;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Reimplements the constrained phrase decoding test
+ */
+
+public class ConstrainedPhraseDecodingTest {
+
+  private static final String CONFIG = "resources/phrase_decoder/constrained.config";
+  private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama ||| President Obama to hinder a strategy for Republican re @-@ election";
+  private static final Path GOLD_PATH = Paths.get("resources/phrase_decoder/constrained.output.gold");
+
+  private JoshuaConfiguration joshuaConfig = null;
+  private Decoder decoder = null;
+
+  @BeforeMethod
+  public void setUp() throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.readConfigFile(CONFIG);
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+  }
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    decoder.cleanUp();
+    decoder = null;
+  }
+
+  @Test(enabled = false)
+  public void givenInput_whenConstrainedPhraseDecoding_thenOutputIsAsExpected() throws IOException {
+    final String translation = decode(INPUT).toString();
+    final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
+    assertEquals(gold, translation);
+  }
+
+  private Translation decode(String input) {
+    final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+    return decoder.decode(sentence);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
new file mode 100644
index 0000000..f2fc6a7
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ package org.apache.joshua.decoder.phrase.decode;
+
+import static com.google.common.base.Charsets.UTF_8;
+import static java.nio.file.Files.readAllBytes;
+import static org.testng.Assert.assertEquals;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Reimplements the constrained phrase decoding test
+ */
+public class PhraseDecodingTest {
+
+  private static final String CONFIG = "resources/phrase_decoder/config";
+  private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
+  private static final Path GOLD_PATH = Paths.get("resources/phrase_decoder/output.gold");
+
+  private JoshuaConfiguration joshuaConfig = null;
+  private Decoder decoder = null;
+
+  @BeforeMethod
+  public void setUp() throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.readConfigFile(CONFIG);
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+  }
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    decoder.cleanUp();
+    decoder = null;
+  }
+
+  @Test(enabled = false)
+  public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
+    final String translation = decode(INPUT).toString();
+    final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
+    assertEquals(gold, translation);
+  }
+
+  private Translation decode(String input) {
+    final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+    return decoder.decode(sentence);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
new file mode 100644
index 0000000..3b2852c
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.segment_file;
+
+import org.testng.annotations.Test;
+
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.AfterMethod;
+import static org.testng.Assert.*;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+
+public class AlmostTooLongSentenceTest {
+  private JoshuaConfiguration joshuaConfiguration;
+  private String almostTooLongInput;
+  private Sentence sentencePlusTarget;
+
+  @BeforeMethod
+  public void setUp() {
+    joshuaConfiguration = new JoshuaConfiguration();
+    almostTooLongInput = concatStrings(".", joshuaConfiguration.maxlen);
+    sentencePlusTarget = new Sentence(this.almostTooLongInput + " ||| target side", 0,joshuaConfiguration);
+  }
+
+  @AfterMethod
+  public void tearDown() {
+  }
+
+  @Test
+  public void testConstructor() {
+    Sentence sent = new Sentence("", 0,joshuaConfiguration);
+    assertNotNull(sent);
+  }
+
+  @Test
+  public void testEmpty() {
+    assertTrue(new Sentence("", 0,joshuaConfiguration).isEmpty());
+  }
+
+  @Test
+  public void testNotEmpty() {
+    assertFalse(new Sentence("hello , world", 0, joshuaConfiguration).isEmpty());
+  }
+
+  /**
+   * Return a string consisting of repeatedToken concatenated MAX_SENTENCE_NODES times.
+   *
+   * @param repeatedToken
+   * @param repeatedTimes
+   * @return
+   */
+  private String concatStrings(String repeatedToken, int repeatedTimes) {
+    String result = "";
+    for (int i = 0; i < repeatedTimes; i++) {
+      result += repeatedToken;
+    }
+    return result;
+  }
+
+  @Test
+  public void testAlmostButNotTooManyTokensSourceOnlyNotEmpty() {
+    assertFalse(new Sentence(this.almostTooLongInput, 0, joshuaConfiguration).isEmpty());
+  }
+
+  @Test
+  public void testAlmostButNotTooManyTokensSourceOnlyTargetNull() {
+    assertNull(new Sentence(this.almostTooLongInput, 0, joshuaConfiguration).target);
+  }
+
+  @Test
+  public void testAlmostButNotTooManyTokensSourceAndTargetTargetIsNotEmpty() {
+    assertFalse(this.sentencePlusTarget.isEmpty());
+  }
+
+  @Test
+  public void testAlmostButNotTooManyTokensSourceAndTargetTargetNull() {
+    assertEquals(this.sentencePlusTarget.target, "target side");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
new file mode 100644
index 0000000..8e0d171
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.segment_file;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+
+import org.testng.annotations.Test;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.AfterMethod;
+import static org.testng.Assert.*;
+
+public class SentenceTest {
+  private String tooLongInput;
+  private final JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+  
+  
+
+  @BeforeMethod
+  public void setUp() {
+    tooLongInput = concatTokens("*", joshuaConfiguration.maxlen * 2);
+  }
+
+  @AfterMethod
+  public void tearDown() {
+  }
+
+  @Test
+  public void testConstructor() {
+    Sentence sent = new Sentence("", 0, joshuaConfiguration);
+    assertNotNull(sent);
+  }
+
+  @Test
+  public void testEmpty() {
+    assertTrue(new Sentence("", 0, joshuaConfiguration).isEmpty());
+  }
+
+  @Test
+  public void testNotEmpty() {
+    assertFalse(new Sentence("hello , world", 0, joshuaConfiguration).isEmpty());
+  }
+
+  /**
+   * Return a string consisting of repeatedToken concatenated MAX_SENTENCE_NODES times, joined by a
+   * space.
+   *
+   * @param repeatedToken
+   * @param repeatedTimes
+   * @return
+   */
+  private String concatTokens(String repeatedToken, int repeatedTimes) {
+    String result = "";
+    for (int i = 0; i < repeatedTimes - 1; i++) {
+      result += repeatedToken + " ";
+    }
+    result += repeatedToken;
+    return result;
+  }
+
+  /**
+   * The too long input sentence should be truncated from 799 to 202 characters
+   * TODO is this a bug? maxlen is defined as 200 not 202 characters
+   */
+  @Test
+  public void testTooManyTokensSourceTruncated() {
+    assertTrue(new Sentence(this.tooLongInput, 0, joshuaConfiguration).length() == 202);
+  }
+
+  @Test
+  public void testTooManyTokensSourceOnlyNotNull() {
+    assertNotNull(new Sentence(this.tooLongInput, 0, joshuaConfiguration));
+  }
+
+  @Test
+  public void testTooManyTokensSourceAndTargetIsEmpty() {
+    Sentence sentence = new Sentence(this.tooLongInput + " ||| target side", 0, joshuaConfiguration);
+    assertEquals(sentence.target, "");
+  }
+
+  @Test
+  public void testTooManyTokensSourceAndTargetTruncated() {
+    Sentence sentence = new Sentence(this.tooLongInput + " ||| target side", 0, joshuaConfiguration);
+    assertTrue(sentence.length() == 202);
+  }
+
+  @Test
+  public void testClearlyNotTooManyTokens() {
+    // Concatenate MAX_SENTENCE_NODES, each shorter than the average length, joined by a space.
+    String input = "token";
+    assertFalse(new Sentence(input, 0, joshuaConfiguration).isEmpty());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/lattice/ArcTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/lattice/ArcTest.java b/joshua-core/src/test/java/org/apache/joshua/lattice/ArcTest.java
new file mode 100644
index 0000000..1ad020c
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/lattice/ArcTest.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.lattice;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for Arc class.
+ * 
+ * @author Lane Schwartz
+ * @since 2008-07-09
+ * @version $LastChangedDate$
+ */
+@Test(groups = { "lattice_arc" })
+public class ArcTest {
+
+  private final Node<String> head = new Node<String>(1);
+  private final Node<String> tail = new Node<String>(2);
+  private final float cost = (float) Math.PI;
+  private final String label = "pi";
+
+  private Arc<String> arc;
+
+  @Test(dependsOnMethods = { "org.apache.joshua.lattice.NodeTest.constructNode" })
+  //@Test(dependsOnGroups = {"lattice_node" })
+  public void constructArc() {
+
+    arc = new Arc<String>(tail, head, (float)cost, label);
+
+    Assert.assertEquals(arc.getHead(), head);
+    Assert.assertEquals(arc.getTail(), tail);
+    Assert.assertEquals(arc.getCost(), cost);
+    Assert.assertEquals(arc.getLabel(), label);
+
+  }
+
+  @Test(dependsOnMethods = { "constructArc" })
+  public void getHead() {
+
+    Assert.assertEquals(arc.getHead(), head);
+
+  }
+
+
+  @Test(dependsOnMethods = { "constructArc" })
+  public void getTail() {
+
+    Assert.assertEquals(arc.getTail(), tail);
+
+  }
+
+
+  @Test(dependsOnMethods = { "constructArc" })
+  public void getCost() {
+
+    Assert.assertEquals(arc.getCost(), cost);
+
+  }
+
+
+  @Test(dependsOnMethods = { "constructArc" })
+  public void getLabel() {
+
+    Assert.assertEquals(arc.getLabel(), label);
+
+  }
+}