You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/23 18:45:28 UTC

[17/60] [partial] incubator-joshua git commit: maven multi-module layout 1st commit: moving files into joshua-core

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java b/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java
new file mode 100644
index 0000000..7e4c2cc
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.zmert;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.FileUtility;
+
+public class ZMERT {
+  public static void main(String[] args) throws Exception {
+    boolean external = false; // should each MERT iteration be launched externally?
+
+    if (args.length == 1) {
+      if (args[0].equals("-h")) {
+        printZMERTUsage(args.length, true);
+        System.exit(2);
+      } else {
+        external = false;
+      }
+    } else if (args.length == 3) {
+      external = true;
+    } else {
+      printZMERTUsage(args.length, false);
+      System.exit(1);
+    }
+
+    if (!external) {
+      JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+      MertCore myMert = new MertCore(args[0],joshuaConfiguration);
+      myMert.run_MERT(); // optimize lambda[]!!!
+      myMert.finish();
+    } else {
+      int maxMem = Integer.parseInt(args[1]);
+      String configFileName = args[2];
+      String stateFileName = FileUtility.dirname(configFileName) + "/ZMERT.temp.state";
+      String cp = System.getProperty("java.class.path");
+      boolean done = false;
+      int iteration = 0;
+      while (!done) {
+        ++iteration;
+        Runtime rt = Runtime.getRuntime();
+        Process p =
+            rt.exec("java -Xmx" + maxMem + "m -cp " + cp + " org.apache.joshua.zmert.MertCore "
+                + configFileName + " " + stateFileName + " " + iteration);
+        BufferedReader br_i = new BufferedReader(new InputStreamReader(p.getInputStream()));
+        BufferedReader br_e = new BufferedReader(new InputStreamReader(p.getErrorStream()));
+        String dummy_line = null;
+        while ((dummy_line = br_i.readLine()) != null) {
+          System.out.println(dummy_line);
+        }
+        while ((dummy_line = br_e.readLine()) != null) {
+          System.out.println(dummy_line);
+        }
+        int status = p.waitFor();
+
+        if (status == 90) {
+          done = true;
+        } else if (status == 91) {
+          done = false;
+        } else {
+          System.out.println("Z-MERT exiting prematurely (MertCore returned " + status + ")...");
+          System.exit(status);
+        }
+      }
+    }
+
+    System.exit(0);
+
+  } // main(String[] args)
+
+  public static void printZMERTUsage(int argsLen, boolean detailed) {
+    if (!detailed) {
+      println("Oops, you provided " + argsLen + " args!");
+      println("");
+      println("Usage:");
+      println("           ZMERT -maxMem maxMemoryInMB MERT_configFile");
+      println("");
+      println("Where -maxMem specifies the maximum amount of memory (in MB) Z-MERT is");
+      println("allowed to use when performing its calculations (no memroy is needed while");
+      println("the decoder is running),");
+      println("and the config file contains any subset of Z-MERT's 20-some parameters,");
+      println("one per line.  Run   ZMERT -h   for more details on those parameters.");
+    } else {
+      println("Usage:");
+      println("           ZMERT -maxMem maxMemoryInMB MERT_configFile");
+      println("");
+      println("Where -maxMem specifies the maximum amount of memory (in MB) Z-MERT is");
+      println("allowed to use when performing its calculations (no memroy is needed while");
+      println("the decoder is running),");
+      println("and the config file contains any subset of Z-MERT's 20-some parameters,");
+      println("one per line.  Those parameters, and their default values, are:");
+      println("");
+      println("Relevant files:");
+      println("  -dir dirPrefix: working directory\n    [[default: null string (i.e. they are in the current directory)]]");
+      println("  -s sourceFile: source sentences (foreign sentences) of the MERT dataset\n    [[default: null string (i.e. file name is not needed by MERT)]]");
+      println("  -r refFile: target sentences (reference translations) of the MERT dataset\n    [[default: reference.txt]]");
+      println("  -rps refsPerSen: number of reference translations per sentence\n    [[default: 1]]");
+      println("  -txtNrm textNormMethod: how should text be normalized?\n       (0) don't normalize text,\n    or (1) \"NIST-style\", and also rejoin 're, *'s, n't, etc,\n    or (2) apply 1 and also rejoin dashes between letters,\n    or (3) apply 1 and also drop non-ASCII characters,\n    or (4) apply 1+2+3\n    [[default: 1]]");
+      println("  -p paramsFile: file containing parameter names, initial values, and ranges\n    [[default: params.txt]]");
+      println("  -docInfo documentInfoFile: file informing Z-MERT which document each\n    sentence belongs to\n    [[default: null string (i.e. all sentences are in one 'document')]]");
+      println("  -fin finalLambda: file name for final lambda[] values\n    [[default: null string (i.e. no such file will be created)]]");
+      println("");
+      println("MERT specs:");
+      println("  -m metricName metric options: name of evaluation metric and its options\n    [[default: BLEU 4 closest]]");
+      println("  -maxIt maxMERTIts: maximum number of MERT iterations\n    [[default: 20]]");
+      println("  -prevIt prevMERTIts: maximum number of previous MERT iterations to\n    construct candidate sets from\n    [[default: 20]]");
+      println("  -minIt minMERTIts: number of iterations before considering an early exit\n    [[default: 5]]");
+      println("  -stopIt stopMinIts: some early stopping criterion must be satisfied in\n    stopMinIts *consecutive* iterations before an early exit\n    [[default: 3]]");
+      println("  -stopSig sigValue: early MERT exit if no weight changes by more than sigValue\n    [[default: -1 (i.e. this criterion is never investigated)]]");
+      println("  -thrCnt threadCount: number of threads to run in parallel when optimizing\n    [[default: 1]]");
+      println("  -save saveInter: save intermediate cfg files (1) or decoder outputs (2)\n    or both (3) or neither (0)\n    [[default: 3]]");
+      println("  -compress compressFiles: should Z-MERT compress the files it produces (1)\n    or not (0)\n    [[default: 0]]");
+      println("  -ipi initsPerIt: number of intermediate initial points per iteration\n    [[default: 20]]");
+      println("  -opi oncePerIt: modify a parameter only once per iteration (1) or not (0)\n    [[default: 0]]");
+      println("  -rand randInit: choose initial point randomly (1) or from paramsFile (0)\n    [[default: 0]]");
+      println("  -seed seed: seed used to initialize random number generator\n    [[default: time (i.e. value returned by System.currentTimeMillis()]]");
+      // println("  -ud useDisk: reliance on disk (0-2; higher value => more reliance)\n    [[default: 2]]");
+      println("");
+      println("Decoder specs:");
+      println("  -cmd commandFile: name of file containing commands to run the decoder\n    [[default: null string (i.e. decoder is a JoshuaDecoder object)]]");
+      println("  -passIt passIterationToDecoder: should iteration number be passed\n    to command file (1) or not (0)\n    [[default: 0]]");
+      println("  -decOut decoderOutFile: name of the output file produced by the decoder\n    [[default: output.nbest]]");
+      println("  -decExit validExit: value returned by decoder to indicate success\n    [[default: 0]]");
+      println("  -dcfg decConfigFile: name of decoder config file\n    [[default: dec_cfg.txt]]");
+      println("  -N N: size of N-best list (per sentence) generated in each MERT iteration\n    [[default: 100]]");
+      println("");
+      println("Output specs:");
+      println("  -v verbosity: Z-MERT verbosity level (0-2; higher value => more verbose)\n    [[default: 1]]");
+      println("  -decV decVerbosity: should decoder output be printed (1) or ignored (0)\n    [[default: 0]]");
+      println("");
+    }
+  }
+
+  private static void println(Object obj) {
+    System.out.println(obj);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/java/org/apache/joshua/zmert/package-info.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/zmert/package-info.java b/joshua-core/src/main/java/org/apache/joshua/zmert/package-info.java
new file mode 100644
index 0000000..571b524
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/zmert/package-info.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * Provides code for performing minimum error rate training.
+ * Much of the code in this package is based on Och (2003). 
+ * A deeper description of the algorithm is in Zaidan (2009).
+ */
+package org.apache.joshua.zmert;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/resources/log4j.properties b/joshua-core/src/main/resources/log4j.properties
new file mode 100644
index 0000000..acca5e9
--- /dev/null
+++ b/joshua-core/src/main/resources/log4j.properties
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# log4j settings
+log4j.rootLogger=WARN, stdout
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.err
+log4j.appender.stdout.layout=org.apache.log4j.SimpleLayout

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/overview.html
----------------------------------------------------------------------
diff --git a/joshua-core/src/overview.html b/joshua-core/src/overview.html
new file mode 100644
index 0000000..7efe5b3
--- /dev/null
+++ b/joshua-core/src/overview.html
@@ -0,0 +1,41 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head></head>
+<body bgcolor="white">
+
+<!--
+##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
+##### TYPE YOUR PACKAGE COMMENTS HERE.  BEGIN WITH A     #####
+##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE:    #####
+-->
+
+Apache Joshua is an extensible, open source statistical 
+hierarchical phrase-based machine translation system.
+
+<!--
+<h2>Related Documentation</h2>
+-->
+
+<!-- Put @see and @since tags down here. -->
+
+@see <a href="http://joshua.incubator.apache.org/">Joshua Website</a>
+
+
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/corpus/CorpusArrayTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/corpus/CorpusArrayTest.java b/joshua-core/src/test/java/org/apache/joshua/corpus/CorpusArrayTest.java
new file mode 100644
index 0000000..19cb20c
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/corpus/CorpusArrayTest.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.corpus;
+
+import java.util.logging.Logger;
+
+public class CorpusArrayTest {
+
+  /** Logger for this class. */
+  private static Logger logger =
+      Logger.getLogger(CorpusArrayTest.class.getName());
+}
+
+//  @Test
+//  public void writePartsToDisk() {
+//
+//    String filename = "data/tiny.en";
+//    int numSentences = 5;  // Should be 5 sentences in tiny.en
+//    int numWords = 89;     // Should be 89 words in tiny.en
+//
+//
+//    try {
+//
+//      // FIX: can't use createVocabulary(String) because we set numWords and numSentences
+//      Vocabulary vocab = new Vocabulary();
+//      SuffixArrayFactory.createVocabulary(filename, vocab);
+//      Corpus corpus = SuffixArrayFactory.createCorpusArray(filename, vocab, numWords, numSentences);
+//
+//      corpus.writeWordIDsToFile(filename+".bin");
+//      corpus.writeSentenceLengthsToFile(filename+".sbin");
+//
+//      MemoryMappedCorpusArray mmCorpus = new MemoryMappedCorpusArray(corpus.getVocabulary(), filename+".bin", numWords*4, filename+".sbin", numSentences*4);
+//
+//      // For each word in the corpus,
+//      for (int i=0; i<corpus.size(); i++) {
+//
+//        // Verify that the memory-mapped corpus and the in-memory corpus have the same value
+//        Assert.assertEquals(mmCorpus.getWordID(i), corpus.getWordID(i));
+//      }
+//
+//
+//      // For each sentence in the corpus
+//      for (int i=0; i<corpus.sentences.length; i++) {
+//
+//        // Verify that the sentence position in the memory-mapped corpus and the in-memory corpus have the same value
+//        Assert.assertEquals(corpus.getSentencePosition(i), mmCorpus.getSentencePosition(i));
+//      }
+//
+//    } catch (IOException e) {
+//      Assert.fail(e.getLocalizedMessage());
+//    }
+//
+//  }
+//
+//  @Test
+//  public void iterate() {
+//
+//    String[] sentences = {
+//        "scientists complete sequencing of the chromosome linked to early dementia",
+//        "( afp , paris , january 2 ) an international team of scientists said that they have completed the sequencing of human chromosome 14 that is linked to many diseases , including the early-onset alzheimer's that may strike people in their 30s .",
+//        "this is the fourth chromosome whose sequence has been completed to date . it comprises more than 87 million pairs of dna .",
+//        "this study published in the weekly british scientific journal nature illustrates that the sequence of chromosome 14 comprises 1,050 genes and gene fragments .",
+//        "the goal of geneticists is to provide diagnostic tools to identify defective genes that cause diseases so as to arrive eventually at treatments that can prevent those genes from malfunctioning ."
+//    };
+//
+//
+//
+//    // Tell System.out and System.err to use UTF8
+//    FormatUtil.useUTF8();
+//
+//    try {
+//
+//      File sourceFile = File.createTempFile("source", new Date().toString());
+//      PrintStream sourcePrintStream = new PrintStream(sourceFile, "UTF-8");
+//      for (String sentence : sentences) {
+//        sourcePrintStream.println(sentence);
+//      }
+//      sourcePrintStream.close();
+//      String corpusFileName = sourceFile.getAbsolutePath();
+//
+//      Vocabulary vocabulary;
+//
+//      logger.fine("Constructing vocabulary from file " + corpusFileName);
+//      vocabulary = new Vocabulary();
+//      int[] lengths = Vocabulary.initializeVocabulary(corpusFileName, vocabulary, true);
+//
+//      logger.fine("Constructing corpus array from file " + corpusFileName);
+//      Corpus corpus = SuffixArrayFactory.createCorpusArray(corpusFileName, vocabulary, lengths[0], lengths[1]);
+//
+//      int expectedIndex = 0;
+//      for (int actualIndex : corpus.corpusPositions()) {
+//        Assert.assertEquals(actualIndex, expectedIndex);
+//        expectedIndex += 1;
+//      }
+//
+//      Assert.assertEquals(corpus.size(), expectedIndex);
+//
+//
+//    } catch (IOException e) {
+//      Assert.fail("Unable to write temporary file. " + e.toString());
+//    }
+//
+//
+//
+//  }
+//
+//
+//  @Test
+//  public void writeAllToDisk() throws ClassNotFoundException {
+//
+//    String filename = "data/tiny.en";
+//    int numSentences = 5;  // Should be 5 sentences in tiny.en
+//    int numWords = 89;     // Should be 89 words in tiny.en
+//
+//
+//    try {
+//
+//      // FIX: can't use createVocabulary(String) because we set numWords and numSentences
+//      Vocabulary vocab = new Vocabulary();
+//      Vocabulary.initializeVocabulary(filename, vocab, true);
+//      CorpusArray corpus = SuffixArrayFactory.createCorpusArray(filename, vocab, numWords, numSentences);
+//
+//      corpus.write(filename+".corpus", filename+".vocab", "UTF-8");
+//
+//      MemoryMappedCorpusArray mmCorpus = new MemoryMappedCorpusArray(filename+".corpus", filename+".vocab");
+//
+//      Assert.assertEquals(mmCorpus.size(), corpus.size());
+//      Assert.assertEquals(mmCorpus.getNumSentences(), corpus.getNumSentences());
+//
+//      // For each word in the corpus,
+//      for (int i=0; i<corpus.size(); i++) {
+//
+//        // Verify that the memory-mapped corpus and the in-memory corpus have the same value
+//        Assert.assertEquals(mmCorpus.getWordID(i), corpus.getWordID(i));
+//      }
+//
+//
+//      // For each sentence in the corpus
+//      for (int i=0; i<corpus.sentences.length; i++) {
+//
+//        // Verify that the sentence start position in the memory-mapped corpus and the in-memory corpus have the same value
+//        Assert.assertEquals(mmCorpus.getSentencePosition(i), corpus.getSentencePosition(i));
+//
+//        // Verify that the sentence end position in the memory-mapped corpus and the in-memory corpus have the same value
+//        Assert.assertEquals(mmCorpus.getSentenceEndPosition(i), corpus.getSentenceEndPosition(i));
+//
+//        // Verify that the phrase corresponding to this sentence is the same
+//        Phrase sentence = corpus.getSentence(i);
+//        Phrase mmSentence = mmCorpus.getSentence(i);
+//        Assert.assertNotNull(sentence);
+//        Assert.assertNotNull(mmSentence);
+//        Assert.assertEquals(mmSentence, sentence);
+//      }
+//
+//    } catch (IOException e) {
+//      Assert.fail(e.getLocalizedMessage());
+//    }
+//
+//  }
+//
+//}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/corpus/SpanTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/corpus/SpanTest.java b/joshua-core/src/test/java/org/apache/joshua/corpus/SpanTest.java
new file mode 100644
index 0000000..3558b79
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/corpus/SpanTest.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.corpus;
+
+import org.apache.joshua.corpus.Span;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ *
+ * 
+ * @author Lane Schwartz
+ */
+public class SpanTest {
+
+  @Test
+  public void iterator() {
+
+    Span span = new Span(1,10);
+
+    int expected = 1;
+
+    for (int actual : span) {
+      Assert.assertEquals(actual, expected);
+      expected++;
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/corpus/VocabularyTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/corpus/VocabularyTest.java b/joshua-core/src/test/java/org/apache/joshua/corpus/VocabularyTest.java
new file mode 100644
index 0000000..834d68b
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/corpus/VocabularyTest.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.corpus;
+
+import static org.apache.joshua.util.FormatUtils.isNonterminal;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class VocabularyTest {
+  private static final String WORD1 = "word1";
+  private static final String WORD2 = "word2";
+  private static final String NON_TERMINAL = "[X]";
+  private static final String GOAL = "[GOAL]";
+
+  @Before
+  public void init() {
+    Vocabulary.clear();
+  }
+  
+  @After
+  public void deinit() {
+    Vocabulary.clear();
+  }
+  
+  @Test
+  public void givenVocabulary_whenEmpty_thenOnlyContainsUnknownWord() {
+    assertTrue(Vocabulary.hasId(Vocabulary.UNKNOWN_ID));
+    assertFalse(Vocabulary.hasId(1));
+    assertFalse(Vocabulary.hasId(-1));
+    assertEquals(Vocabulary.UNKNOWN_WORD, Vocabulary.word(Vocabulary.UNKNOWN_ID));
+    assertEquals(1, Vocabulary.size());
+  }
+  
+  @Test
+  public void givenVocabulary_whenNewWord_thenMappingIsAdded() {
+    final int FIRST_WORD_ID = 1;
+    assertFalse(Vocabulary.hasId(FIRST_WORD_ID));
+    assertEquals(FIRST_WORD_ID, Vocabulary.id(WORD1));
+    //should return same id after second call:
+    assertEquals(FIRST_WORD_ID, Vocabulary.id(WORD1));
+    assertTrue(Vocabulary.hasId(FIRST_WORD_ID));
+    assertEquals(WORD1, Vocabulary.word(FIRST_WORD_ID));
+    assertEquals(2, Vocabulary.size());
+  }
+  
+  @Test
+  public void givenVocabulary_whenCheckingStringInBracketsOrNegativeNumber_thenIsNonTerminal() {
+    //non-terminals
+    assertTrue(isNonterminal(NON_TERMINAL));
+    //terminals
+    assertFalse(isNonterminal(WORD1));
+    assertFalse(isNonterminal("[]"));
+    assertFalse(isNonterminal("["));
+    assertFalse(isNonterminal("]"));
+    assertFalse(isNonterminal(""));
+    
+    //negative numbers indicate non-terminals
+    assertTrue(isNonterminal(-1));
+    assertTrue(isNonterminal(-5));
+    
+    //positive numbers indicate terminals:
+    assertFalse(isNonterminal(0));
+    assertFalse(isNonterminal(5));
+  }
+  
+  @Test
+  public void givenVocabulary_whenNonTerminal_thenReturnsStrictlyPositiveNonTerminalIndices() {
+    final int FIRST_NON_TERMINAL_INDEX = 1;
+    assertTrue(Vocabulary.id(NON_TERMINAL) < 0);
+    assertTrue(Vocabulary.hasId(FIRST_NON_TERMINAL_INDEX));
+    assertTrue(Vocabulary.hasId(-FIRST_NON_TERMINAL_INDEX));
+    
+    assertTrue(Vocabulary.id("") > 0);
+    assertTrue(Vocabulary.id(WORD1) > 0);
+    
+    final int SECOND_NON_TERMINAL_INDEX = 4;
+    assertTrue(Vocabulary.id(GOAL) < 0);
+    assertTrue(Vocabulary.hasId(SECOND_NON_TERMINAL_INDEX));
+    assertTrue(Vocabulary.hasId(-SECOND_NON_TERMINAL_INDEX));
+    
+    assertTrue(Vocabulary.id(WORD2) > 0);
+  }
+  
+  @Rule
+  public TemporaryFolder folder = new TemporaryFolder();
+  
+  @Test
+  public void givenVocabulary_whenWritenAndReading_thenVocabularyStaysTheSame() throws IOException {
+    File vocabFile = folder.newFile();
+    
+    int id1 = Vocabulary.id(WORD1);
+    int id2 = Vocabulary.id(NON_TERMINAL);
+    int id3 = Vocabulary.id(WORD2);
+    
+    Vocabulary.write(vocabFile.getAbsolutePath());
+    
+    Vocabulary.clear();
+    
+    Vocabulary.read(vocabFile);
+    
+    assertEquals(4, Vocabulary.size()); //unknown word + 3 other words
+    assertTrue(Vocabulary.hasId(id1));
+    assertTrue(Vocabulary.hasId(id2));
+    assertTrue(Vocabulary.hasId(id3));
+    assertEquals(id1, Vocabulary.id(WORD1));
+    assertEquals(id2, Vocabulary.id(NON_TERMINAL));
+    assertEquals(id3, Vocabulary.id(WORD2));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/corpus/vocab/VocabularyTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/corpus/vocab/VocabularyTest.java b/joshua-core/src/test/java/org/apache/joshua/corpus/vocab/VocabularyTest.java
new file mode 100644
index 0000000..c1af5ab
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/corpus/vocab/VocabularyTest.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.corpus.vocab;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Date;
+import org.apache.joshua.corpus.Vocabulary;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ *
+ * 
+ * @author Lane Schwartz
+ */
+public class VocabularyTest {
+
+  /** [X], [X,1], [X,2], [S], [S,1] <unk>, <s>, </s>, -pau-*/
+  int numBuiltInSymbols = 9;
+
+  /** <unk>, <s>, </s>, -pau- */
+  int numBuiltInTerminals = 4;
+
+  @Test
+  public void basicVocabTest() {
+
+    Vocabulary vocab1 = new Vocabulary();
+    Vocabulary vocab2 = new Vocabulary();
+
+    Assert.assertEquals(vocab1, vocab2);
+
+    Assert.assertFalse(vocab1.size() == 0);
+    //Assert.assertTrue(vocab1.intToString.get(0)==Vocabulary.UNKNOWN_WORD_STRING);
+    //Assert.assertFalse(vocab1.getWords().isEmpty());
+    //    Assert.assertTrue(vocab1.getWords(0)==Vocabulary.UNKNOWN_WORD_STRING);
+    //    Assert.assertEquals(vocab1.getWords(), vocab1.intToString.values());
+
+    Assert.assertNotEquals(vocab1.size(), numBuiltInSymbols);
+    //    Assert.assertEquals(vocab1.getWord(Vocabulary.UNKNOWN_WORD), Vocabulary.UNKNOWN_WORD_STRING);
+
+    //Assert.assertEquals(vocab1.getID("sample"), Vocabulary.UNKNOWN_WORD);
+    //Assert.assertEquals(vocab1.getID(null), Vocabulary.UNKNOWN_WORD);
+
+    //    Assert.assertFalse(vocab1.terminalToInt.isEmpty());
+    //    Assert.assertEquals(vocab1.terminalToInt.size(), this.numBuiltInTerminals);
+    //    Assert.assertFalse(vocab1.isFixed);
+    //
+    //    vocab1.fixVocabulary();
+    //    Assert.assertTrue(vocab1.isFixed);
+
+    //    Assert.assertEquals(vocab1.getID(Vocabulary.X_STRING), -1);
+    //    Assert.assertEquals(vocab1.getID(Vocabulary.X1_STRING), -2);
+    //    Assert.assertEquals(vocab1.getID(Vocabulary.X2_STRING), -3);
+    //
+    //    Assert.assertEquals(vocab1.getWord(-1), Vocabulary.X_STRING);
+    //    Assert.assertEquals(vocab1.getWord(-2), Vocabulary.X1_STRING);
+    //    Assert.assertEquals(vocab1.getWord(-3), Vocabulary.X2_STRING);
+
+
+
+    //    Assert.assertFalse(vocab2.intToString.isEmpty());
+    //		Assert.assertTrue(vocab2.intToString.get(0)==Vocabulary.UNKNOWN_WORD_STRING);
+    //    Assert.assertFalse(vocab2.getWords().isEmpty());
+    //		Assert.assertTrue(vocab2.getWord(0)==Vocabulary.UNKNOWN_WORD_STRING);
+    //    Assert.assertEquals(vocab2.getWords(), vocab2.intToString.values());
+
+    Assert.assertNotEquals(vocab2.size(), numBuiltInSymbols);
+    //    Assert.assertEquals(vocab2.getWord(Vocabulary.UNKNOWN_WORD), Vocabulary.UNKNOWN_WORD_STRING);
+
+    //		Assert.assertEquals(vocab2.getID("sample"), Vocabulary.UNKNOWN_WORD);
+    //		Assert.assertEquals(vocab2.getID(null), Vocabulary.UNKNOWN_WORD);
+
+    //    Assert.assertFalse(vocab2.terminalToInt.isEmpty());
+    //    Assert.assertEquals(vocab2.terminalToInt.size(), this.numBuiltInTerminals);
+    //		Assert.assertTrue(vocab2.isFixed);
+  }
+
+  @Test
+  public void verifyWordIDs() throws IOException {
+
+    // Adam Lopez's example...
+    String corpusString = "it makes him and it mars him , it sets him on and it takes him off .";
+    //		String queryString = "it persuades him and it disheartens him";
+
+    String sourceFileName;
+    {
+      File sourceFile = File.createTempFile("source", new Date().toString());
+      PrintStream sourcePrintStream = new PrintStream(sourceFile, "UTF-8");
+      sourcePrintStream.println(corpusString);
+      sourcePrintStream.close();
+      sourceFileName = sourceFile.getAbsolutePath();
+    }
+
+    Vocabulary vocab = new Vocabulary();
+    //    Vocabulary.initializeVocabulary(sourceFileName, vocab, true);
+
+//    Assert.assertEquals(vocab.getWords(Vocabulary.id("it")), "it");
+//    Assert.assertEquals(vocab.getWord(vocab.getID("makes")), "makes");
+//    Assert.assertEquals(vocab.getWord(vocab.getID("him")), "him");
+//    Assert.assertEquals(vocab.getWord(vocab.getID("and")), "and");
+//    Assert.assertEquals(vocab.getWord(vocab.getID("mars")), "mars");
+//    Assert.assertEquals(vocab.getWord(vocab.getID(",")), ",");
+//    Assert.assertEquals(vocab.getWord(vocab.getID("sets")), "sets");
+//    Assert.assertEquals(vocab.getWord(vocab.getID("on")), "on");
+//    Assert.assertEquals(vocab.getWord(vocab.getID("takes")), "takes");
+//    Assert.assertEquals(vocab.getWord(vocab.getID("off")), "off");
+
+    //		Assert.assertEquals(vocab.getWord(vocab.getID("persuades")), Vocabulary.UNKNOWN_WORD_STRING);
+    //		Assert.assertEquals(vocab.getWord(vocab.getID("disheartens")), Vocabulary.UNKNOWN_WORD_STRING);
+  }
+
+  @SuppressWarnings("static-access")
+  @Test(enabled=false)
+  public void loadVocabFromFile() {
+
+    String filename = "data/tiny.en";
+    int numSentences = 5;  // Should be 5 sentences in tiny.en
+    int numWords = 89;     // Should be 89 words in tiny.en
+    int numUniqWords = 60; // Should be 60 unique words in tiny.en
+
+    Vocabulary vocab = new Vocabulary();
+    Vocabulary vocab2 = new Vocabulary();
+
+    Assert.assertTrue(vocab.equals(vocab2));
+    Assert.assertTrue(vocab2.equals(vocab));
+    Assert.assertEquals(vocab, vocab2);
+
+    try {
+      vocab.read(new File(getClass().getClassLoader().getResource(filename).getFile()));
+      //int[] result = Vocabulary.initializeVocabulary(filename, vocab, true);
+      Assert.assertNotNull(vocab);
+      Assert.assertEquals(vocab.size(), 2);
+      //Assert.assertEquals(vocab.getWords(numWords), numWords); 
+      // Assert.assertEquals(result[1], numSentences);  
+
+      //Assert.assertTrue(vocab.isFixed);
+      Assert.assertEquals(Vocabulary.size(), numUniqWords+numBuiltInSymbols);
+
+    } catch (IOException e) {
+      Assert.fail("Error processing " + filename +"; Reason: " + e);
+    }
+
+    Assert.assertFalse(vocab.equals(vocab2));
+
+    try {
+      vocab2.read(new File(filename));
+      //int[] result = Vocabulary.initializeVocabulary(filename, vocab2, true);
+      Assert.assertNotNull(vocab2);
+      Assert.assertEquals(vocab2.size(), 2);
+      //      Assert.assertEquals(result[0], numWords); 
+      //      Assert.assertEquals(result[1], numSentences);  
+
+      //			Assert.assertTrue(vocab2.isFixed);
+      Assert.assertEquals(Vocabulary.size(), numUniqWords+numBuiltInSymbols);
+
+    } catch (IOException e) {
+      Assert.fail("Could not load file " + filename);
+    }
+
+    Assert.assertEquals(vocab, vocab2);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/ArtificialGrammarAndCorpusCreater.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ArtificialGrammarAndCorpusCreater.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ArtificialGrammarAndCorpusCreater.java
new file mode 100644
index 0000000..5cc5996
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ArtificialGrammarAndCorpusCreater.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.joshua.util.FileUtility;
+
+public class ArtificialGrammarAndCorpusCreater {
+
+  private static final String JOSHUA_RULE_SEPARATOR = " ||| ";
+  private static final String ARTIFICAL_TERMINAL_RULE1 = "[T1]" + JOSHUA_RULE_SEPARATOR + "garcon"
+      + JOSHUA_RULE_SEPARATOR + "boy" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_TERMINAL_RULE2 = "[T2]" + JOSHUA_RULE_SEPARATOR + "fille"
+      + JOSHUA_RULE_SEPARATOR + "girl" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_TERMINAL_RULE3 = "[T3]" + JOSHUA_RULE_SEPARATOR + "garcon"
+      + JOSHUA_RULE_SEPARATOR + "mister" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_TERMINAL_RULE4 = "[T4]" + JOSHUA_RULE_SEPARATOR + "fille"
+      + JOSHUA_RULE_SEPARATOR + "woman" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_TERMINAL_RULE5 = "[T5]" + JOSHUA_RULE_SEPARATOR + "fille"
+      + JOSHUA_RULE_SEPARATOR + "lady" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_NONTERTERMINAL_RULE1 = "[NT1]" + JOSHUA_RULE_SEPARATOR
+      + "le [T1,1] aime la [T2,2]" + JOSHUA_RULE_SEPARATOR + "the [T1,1] loves the [T2,2]"
+      + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_NONTERTERMINAL_RULE_INVERTED = "[NT1]"
+      + JOSHUA_RULE_SEPARATOR + "le [T1,1] aime la [T2,2]" + JOSHUA_RULE_SEPARATOR
+      + "the [T2,2] loves the [T1,1]" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_TERMINAL_RULE6 = "[T6]" + JOSHUA_RULE_SEPARATOR + "garcon"
+      + JOSHUA_RULE_SEPARATOR + "sir" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+
+  private static final String GLUE_RULE_BEGIN = "[GOAL] ||| <s> ||| <s> ||| 0";
+  private static final String GLUE_RULE_NT = "[GOAL] ||| [GOAL,1] [NT1,2] ||| [GOAL,1] [NT1,2] ||| -1";
+  private static final String GLUE_RULE_END = "[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0";
+
+  private static final String TEST_SENTENCE1 = "le garcon aime la fille";
+
+  private static final List<String> getArtificalGrammarsList1() {
+    List<String> result = Arrays.asList(ARTIFICAL_TERMINAL_RULE1, ARTIFICAL_TERMINAL_RULE2,
+        ARTIFICAL_TERMINAL_RULE3, ARTIFICAL_TERMINAL_RULE4, ARTIFICAL_TERMINAL_RULE5,
+        ARTIFICAL_TERMINAL_RULE6, ARTIFICAL_NONTERTERMINAL_RULE1);
+    return result;
+  }
+
+  private static List<String> getArtificalGrammarsList2() {
+    List<String> result = new ArrayList<String>(getArtificalGrammarsList1());
+    result.add(ARTIFICAL_NONTERTERMINAL_RULE_INVERTED);
+    return result;
+  }
+
+  private static final List<String> ARTIFICIAL_GLUE_GRAMMAR_RULES_LIST = Arrays.asList(
+      GLUE_RULE_BEGIN, GLUE_RULE_NT, GLUE_RULE_END);
+
+  private final String mainGrammarFilePath;
+  private final String glueGrammarFilePath;
+  private final String testSentencesFilePath;
+
+  private ArtificialGrammarAndCorpusCreater(String mainGrammarFilePath, String glueGrammarFilePath,
+      String testSentencesFilePath) {
+    this.mainGrammarFilePath = mainGrammarFilePath;
+    this.glueGrammarFilePath = glueGrammarFilePath;
+    this.testSentencesFilePath = testSentencesFilePath;
+  }
+
+  public static ArtificialGrammarAndCorpusCreater createArtificialGrammarAndCorpusCreater(
+      String mainGrammarFilePath, String glueGrammarFilePath, String testSentencesFilePath) {
+    return new ArtificialGrammarAndCorpusCreater(mainGrammarFilePath, glueGrammarFilePath,
+        testSentencesFilePath);
+  }
+
+  private static final void writeFile(String filePath, List<String> lines) {
+    BufferedWriter outputWriter = null;
+    try {
+      outputWriter = new BufferedWriter(new FileWriter(filePath));
+      for (int i = 0; i < lines.size() - 1; i++) {
+        outputWriter.write(lines.get(i) + "\n");
+      }
+      if (!lines.isEmpty()) {
+        outputWriter.write(lines.get(lines.size() - 1));
+      }
+    } catch (IOException e) {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
+    } finally {
+      FileUtility.closeCloseableIfNotNull(outputWriter);
+    }
+  }
+
+  protected final void writeMainGrammar(boolean includeInvertingNonterminalRule) {
+    List<String> ruleList;
+    if(includeInvertingNonterminalRule)
+    {
+      ruleList = getArtificalGrammarsList2();
+    }
+    else{
+      ruleList = getArtificalGrammarsList1();
+    }
+
+    writeFile(mainGrammarFilePath,ruleList);
+  }
+
+  protected final void writeGlueGrammar() {
+    writeFile(glueGrammarFilePath, ARTIFICIAL_GLUE_GRAMMAR_RULES_LIST);
+  }
+
+  protected final void writeTestSentencesFile1() {
+    writeFile(testSentencesFilePath, Arrays.asList(TEST_SENTENCE1));
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/DecoderThreadTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/DecoderThreadTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/DecoderThreadTest.java
new file mode 100644
index 0000000..326ab23
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/DecoderThreadTest.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Date;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for decoder thread.
+ * 
+ * @author Lane Schwartz
+ * @version $LastChangedDate$
+ */
+public class DecoderThreadTest {
+
+  @Test
+  public void setup() {
+
+    String[] sourceSentences = {
+        "a b c d",
+        "a b c d",
+        "a b c d"
+    };
+
+    String[] targetSentences = {
+        "w x y z",
+        "w t u v",
+        "s x y z"
+    };
+
+    String[] alignmentLines = {
+        "0-0 1-1 2-2 3-3",
+        "0-0 1-1 2-2 3-3",
+        "0-0 1-1 2-2 3-3"
+    };
+
+    String[] testSentences = {
+        "a b c"	
+    };
+
+    try {
+
+      // Set up source corpus
+      File sourceFile = File.createTempFile("source", new Date().toString());
+      PrintStream sourcePrintStream = new PrintStream(sourceFile, "UTF-8");
+      for (String sentence : sourceSentences) {
+        sourcePrintStream.println(sentence);
+      }
+      sourcePrintStream.close();
+      String sourceCorpusFileName = sourceFile.getAbsolutePath();
+
+//      Vocabulary vocabulary = new Vocabulary();
+//      int[] sourceLengths = Vocabulary.initializeVocabulary(sourceCorpusFileName, vocabulary, true);
+//      Assert.assertEquals(sourceLengths.length, 2);
+//      int numberOfSentences = sourceLengths[1];
+//
+//      Corpus sourceCorpus = SuffixArrayFactory.createCorpusArray(sourceCorpusFileName, vocabulary, sourceLengths[0], sourceLengths[1]);
+
+
+      // Set up target corpus
+      File targetFile = File.createTempFile("target", new Date().toString());
+      PrintStream targetPrintStream = new PrintStream(targetFile, "UTF-8");
+      for (String sentence : targetSentences) {
+        targetPrintStream.println(sentence);
+      }
+      targetPrintStream.close();
+      String targetCorpusFileName = targetFile.getAbsolutePath();
+
+//      int[] targetLengths = Vocabulary.initializeVocabulary(targetCorpusFileName, vocabulary, true);
+//      Assert.assertEquals(targetLengths.length, sourceLengths.length);
+//      for (int i=0, n=targetLengths.length; i<n; i++) {
+//        Assert.assertEquals(targetLengths[i], sourceLengths[i]);
+//      }
+//
+//      Corpus targetCorpus = SuffixArrayFactory.createCorpusArray(targetCorpusFileName, vocabulary, targetLengths[0], targetLengths[1]);
+
+
+      // Construct alignments data structure
+      File alignmentsFile = File.createTempFile("alignments", new Date().toString());
+      PrintStream alignmentsPrintStream = new PrintStream(alignmentsFile, "UTF-8");
+      for (String sentence : alignmentLines) {
+        alignmentsPrintStream.println(sentence);
+      }
+      alignmentsPrintStream.close();
+      String alignmentFileName = alignmentsFile.getAbsolutePath();
+
+//      AlignmentGrids grids = new AlignmentGrids(
+//          new Scanner(alignmentsFile), 
+//          sourceCorpus, 
+//          targetCorpus, 
+//          numberOfSentences);
+
+
+      // Set up test corpus
+      File testFile = File.createTempFile("test", new Date().toString());
+      PrintStream testPrintStream = new PrintStream(testFile, "UTF-8");
+      for (String sentence : testSentences) {
+        testPrintStream.println(sentence);
+      }
+      testPrintStream.close();
+      String testFileName = testFile.getAbsolutePath();
+
+      // Filename of the extracted rules file.
+      String rulesFileName; {	
+        File rulesFile = File.createTempFile("rules", new Date().toString());
+        rulesFileName = rulesFile.getAbsolutePath();
+      }
+
+      String joshDirName; {
+        File joshDir = File.createTempFile(new Date().toString(), "josh");
+        joshDirName = joshDir.getAbsolutePath();
+        joshDir.delete();
+      }
+
+
+//      Compile compileJoshDir = new Compile();
+//      compileJoshDir.setSourceCorpus(sourceCorpusFileName);
+//      compileJoshDir.setTargetCorpus(targetCorpusFileName);
+//      compileJoshDir.setAlignments(alignmentFileName);
+//      compileJoshDir.setOutputDir(joshDirName);
+//      compileJoshDir.execute();
+//
+//      ExtractRules extractRules = new ExtractRules();
+//      extractRules.setJoshDir(joshDirName);
+//      extractRules.setTestFile(testFileName);
+//      extractRules.setOutputFile(rulesFileName);
+//      extractRules.execute();
+
+    } catch (IOException e) {
+      Assert.fail("Unable to write temporary file. " + e.toString());
+    }
+//    } catch (ClassNotFoundException e) {
+//      Assert.fail("Unable to extract rules. " + e.toString());
+//    }
+  }
+
+  @Test
+  public void basicSuffixArrayGrammar() {
+
+    // Write configuration to temp file on disk
+    //		String configFile;
+
+
+    //		JoshuaDecoder decoder = 
+    //			JoshuaDecoder.getUninitalizedDecoder(configFile);
+
+
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/JoshuaDecoderTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/JoshuaDecoderTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/JoshuaDecoderTest.java
new file mode 100644
index 0000000..2a878f3
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/JoshuaDecoderTest.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Scanner;
+
+import org.testng.Assert;
+import org.testng.annotations.Parameters;
+import org.testng.annotations.Test;
+
+/**
+ * Performs regression tests to verify that the decoder produces expected output
+ * on known data sets.
+ * 
+ * @author Lane Schwartz
+ */
+public class JoshuaDecoderTest {
+
+  @Parameters({ "configFile", "sourceInput", "referenceOutput" })
+  @Test
+  public void regressionTest(String configFile, String sourceInput, String referenceOutput)
+      throws IOException {
+
+    File referenceFile = new File(referenceOutput);
+    File output = File.createTempFile("output", null);// ,
+                                                      // referenceFile.getParentFile());
+
+    String[] args = { configFile, sourceInput, output.getAbsoluteFile().toString() };
+    JoshuaDecoder.main(args);
+
+    Scanner resultScanner = new Scanner(output);
+    Scanner refScanner = new Scanner(referenceFile);
+
+    while (resultScanner.hasNextLine() && refScanner.hasNextLine()) {
+
+      String resultLine = resultScanner.nextLine();
+      String refLine = refScanner.nextLine();
+
+      String[] resultParts = resultLine.split(" \\|\\|\\| ");
+      String[] refParts = refLine.split(" \\|\\|\\| ");
+
+      Assert.assertEquals(resultParts.length, 4);
+      Assert.assertEquals(refParts.length, 4);
+
+      Assert.assertEquals(Integer.parseInt(resultParts[0]), Integer.parseInt(refParts[0]));
+      Assert.assertEquals(resultParts[1], refParts[1]);
+
+      String[] resultFeatures = resultParts[2].split(" ");
+      String[] refFeatures = refParts[2].split(" ");
+
+      Assert.assertEquals(resultFeatures.length, 5);
+      Assert.assertEquals(refFeatures.length, 5);
+
+      float acceptableDelta = 0.001f;
+      for (int i = 0; i < refFeatures.length; i++) {
+        Assert.assertEquals(Float.valueOf(resultFeatures[i]), Float.valueOf(refFeatures[i]),
+            acceptableDelta);
+      }
+    }
+    
+    resultScanner.close();
+    refScanner.close();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/TestConfigFileCreater.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/TestConfigFileCreater.java b/joshua-core/src/test/java/org/apache/joshua/decoder/TestConfigFileCreater.java
new file mode 100644
index 0000000..5399bab
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/TestConfigFileCreater.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.List;
+import org.apache.joshua.util.FileUtility;
+
+public class TestConfigFileCreater {
+
+
+  protected static String LANGUAGE_MODEL_FILE_NAME = "lm.gz";
+  private static final String NL = "\n";
+  private static final Double NEW_FEATURES_WEIGHT = 0.2;
+
+  private final String testTempFilesFolderName;
+  private final String mainGrammarFileName;
+  private final String glueGrammarFileName;
+  private final List<Double> phraseTableWeights;
+  private final boolean useSoftSyntacticDecoding;
+  private final boolean switchOfPruning;
+
+  private TestConfigFileCreater(String testTemFilesFolderName, String mainGrammarFileName,
+      String glueGrammarFileName, List<Double> phraseTableWeights,
+      boolean useSoftSyntacticDecoding, boolean switchOfPruning) {
+    this.testTempFilesFolderName = testTemFilesFolderName;
+    this.mainGrammarFileName = mainGrammarFileName;
+    this.glueGrammarFileName = glueGrammarFileName;
+    this.phraseTableWeights = phraseTableWeights;
+    this.useSoftSyntacticDecoding = useSoftSyntacticDecoding;
+    this.switchOfPruning = switchOfPruning;
+  }
+
+  public static TestConfigFileCreater createFeaturesTestConfigFileCreater(
+      String testTemFilesFolderName, String mainGrammarFileName, String glueGrammarFileName,
+
+      List<Double> phraseTableWeights, boolean useSoftSyntacticDecoding, boolean switchOfPruning) {
+    return new TestConfigFileCreater(testTemFilesFolderName, mainGrammarFileName,
+        glueGrammarFileName, phraseTableWeights, useSoftSyntacticDecoding, switchOfPruning);
+  }
+
+  private final String createGlueGrammarFileSpecificationLine() {
+    return "tm = thrax glue -1 " + "./" + testTempFilesFolderName + "/" + glueGrammarFileName;
+  }
+
+  private final String createMainGrammarFileSpecificationLine() {
+    return "tm = thrax pt 12 " + "./" + testTempFilesFolderName + "/" + mainGrammarFileName;
+  }
+
+  private static String getFeatureSwitchOnString(String featureFunctionName) {
+    return "feature-function = " + featureFunctionName;
+  }
+
+  public String getPruningSpecification() {
+    if (switchOfPruning) {
+      return "pop-limit = 0" + NL;
+    } else {
+      return "pop-limit = 100" + NL;
+    }
+  }
+
+  // Large String containing the mostly static, partly dynamic generated mose config
+  // file contents used for the test
+  private final String getJoshuaConfigFileFirstPart(boolean useSoftSyntacticDecoding) {
+    String result = "lm = kenlm 5 false false 100 " + createFullPath(LANGUAGE_MODEL_FILE_NAME) + NL
+        + createMainGrammarFileSpecificationLine() + NL + createGlueGrammarFileSpecificationLine()
+        + NL + "mark_oovs=false" + NL + "#tm config" + NL + "default_non_terminal = OOV" + NL
+        + "goalSymbol = GOAL" + NL + "#pruning config" + NL + getPruningSpecification()
+        + JoshuaConfiguration.SOFT_SYNTACTIC_CONSTRAINT_DECODING_PROPERTY_NAME + " = "
+        + useSoftSyntacticDecoding + NL + "#nbest config" + NL + "use_unique_nbest = true" + NL
+
+        + "top_n = 100" // + NL +
+        // "feature-function = OOVPenalty"
+        + NL + "feature-function = WordPenalty";
+    return result;
+  }
+
+  private final String createPhraseTableSpecificationString() {
+    String result = "";
+    for (int i = 0; i < phraseTableWeights.size(); i++) {
+      double phraseTableWeight = phraseTableWeights.get(i);
+      result += "tm_pt_" + i + " " + phraseTableWeight + NL;
+    }
+    return result;
+  }
+
+  private final String getMosesConfigFilePart2() {
+    String retsult = "###### model weights" + NL + "#lm order weight" + NL
+        + "WordPenalty -3.0476045270236662" + NL + createPhraseTableSpecificationString()
+        + "lm_0 1.3200621467242506"
+        // "#phrasemodel owner column(0-indexed)"
+        + NL + "tm_glue_0 1" + NL + "oovpenalty -100.0" + NL;
+    return retsult;
+  }
+
+  // private static final int NO_PHRASE_WEIGTHS = 22;
+
+  /*
+   * private static String createPhraseWeightsSpecification() { String result =
+   * "#phrasemodel owner column(0-indexed) weight" + NL; for (int i = 0; i < NO_PHRASE_WEIGTHS; i++)
+   * { result += "tm_pt_" + i + 0.5; } return result; }
+   */
+
+  private static String createFeatureWeightSpecifications(List<String> featureNames,
+      double featureWeight) {
+    String result = "";
+    for (String featureName : featureNames) {
+      result += featureName + " " + featureWeight + "\n";
+    }
+    return result;
+  }
+
+  protected String createJoshuaConfigFileContentsWithExtraFeatures(String featureFunctionName,
+      List<String> featureNames) {
+    String result = createJoshuaConfigFileContents(featureFunctionName);
+    result += createFeatureWeightSpecifications(featureNames, NEW_FEATURES_WEIGHT);
+    return result;
+  }
+
+  protected String createJoshuaConfigFileContents(String featureFunctionName) {
+    String result = getJoshuaConfigFileFirstPart(useSoftSyntacticDecoding);
+    result += NL + getFeatureSwitchOnString(featureFunctionName) + NL;
+    result += getMosesConfigFilePart2();
+    return result;
+  }
+
+  protected String createJoshuaConfigFileContents() {
+    String result = getJoshuaConfigFileFirstPart(useSoftSyntacticDecoding);
+    result += NL;
+    result += getMosesConfigFilePart2();
+    return result;
+  }
+
+  protected static void writeContents(String filePath, String contents) {
+    BufferedWriter outputWriter = null;
+    try {
+      outputWriter = new BufferedWriter(new FileWriter(filePath));
+      outputWriter.write(contents);
+    } catch (IOException e) {
+      e.printStackTrace();
+      throw new RuntimeException(e);
+    } finally {
+      FileUtility.closeCloseableIfNotNull(outputWriter);
+    }
+  }
+
+  String createFullPath(String fileName) {
+    return testTempFilesFolderName + "/" + fileName;
+  }
+
+  protected void writeBasicJoshuaConfigFile(String configFileName) {
+    writeContents(createFullPath(configFileName), createJoshuaConfigFileContents());
+  }
+
+  protected void writeBasicJoshuaConfigFile(String configFileName, String featureFunctionName) {
+    writeContents(createFullPath(configFileName),
+        createJoshuaConfigFileContents(featureFunctionName));
+  }
+
+  protected void writeJoshuaExtraFeaturesConfigFile(String configFileName,
+      String featureFunctionName, List<String> featureNames) {
+    TestConfigFileCreater.writeContents(createFullPath(configFileName),
+        createJoshuaConfigFileContentsWithExtraFeatures(featureFunctionName, featureNames));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/TranslationsTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/TranslationsTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/TranslationsTest.java
new file mode 100644
index 0000000..9d2cb34
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/TranslationsTest.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import static org.testng.Assert.*;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+
+import org.testng.annotations.Test;
+import org.testng.annotations.BeforeTest;
+import org.apache.joshua.decoder.io.TranslationRequestStream;
+import org.testng.annotations.AfterTest;
+
+public class TranslationsTest {
+  private final JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+  @BeforeTest
+  public void beforeTest() {
+  }
+
+  @AfterTest
+  public void afterTest() {
+  }
+
+
+  @Test(enabled = false)
+  public void Translations() {
+    throw new RuntimeException("Test not implemented");
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.TranslationRequest#next()}.
+   */
+  @Test(enabled = false)
+  public void testNext() {
+    fail("Not yet implemented");
+  }
+
+  @Test(enabled = false)
+  public void iterator() {
+    throw new RuntimeException("Test not implemented");
+  }
+
+  // @Test(expectedExceptions = TestException.class)
+  @Test(enabled = false)
+  public void next() {
+    byte[] data = "1\n2\n".getBytes();
+    ByteArrayInputStream input = new ByteArrayInputStream(data);
+    TranslationRequestStream request = new TranslationRequestStream(
+        new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+    Translations translations = new Translations(request);
+    assertEquals(translations.next().getSourceSentence().source(), "1");
+    // Remove the next two.
+    assertEquals(translations.next().getSourceSentence().source(), "2");
+    // Should throw exception
+    translations.next();
+    translations.next();
+  }
+
+  @Test(enabled = false)
+  public void record() {
+    throw new RuntimeException("Test not implemented");
+  }
+
+  @Test(enabled = false)
+  public void remove() {
+    throw new RuntimeException("Test not implemented");
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/ff/ArityPhrasePenaltyFFTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/ArityPhrasePenaltyFFTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/ArityPhrasePenaltyFFTest.java
new file mode 100644
index 0000000..9899298
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/ArityPhrasePenaltyFFTest.java
@@ -0,0 +1,64 @@
+///*
+// * Licensed to the Apache Software Foundation (ASF) under one
+// * or more contributor license agreements.  See the NOTICE file
+// * distributed with this work for additional information
+// * regarding copyright ownership.  The ASF licenses this file
+// * to you under the Apache License, Version 2.0 (the
+// * "License"); you may not use this file except in compliance
+// * with the License.  You may obtain a copy of the License at
+// *
+// *  http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing,
+// * software distributed under the License is distributed on an
+// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// * KIND, either express or implied.  See the License for the
+// * specific language governing permissions and limitations
+// * under the License.
+// */
+//package org.apache.joshua.decoder.ff;
+//
+//import org.apache.joshua.decoder.ff.tm.BilingualRule;
+//import org.apache.joshua.decoder.ff.tm.MonolingualRule;
+//import org.apache.joshua.decoder.ff.tm.Rule;
+//
+//import org.testng.Assert;
+//import org.testng.annotations.Test;
+//
+///**
+// * Unit tests for ArityPhrasePenaltyFF.
+// * 
+// * @author Lane Schwartz
+// * @version $LastChangedDate$
+// */
+//public class ArityPhrasePenaltyFFTest {
+//
+//  @Test
+//  public void alpha() {
+//    Assert.assertEquals(ArityPhrasePenaltyFF.ALPHA, - Math.log10(Math.E));
+//  }
+//
+//  @Test
+//  public void estimate() {
+//
+//    int featureID = 0;
+//    double weight = 0.0;
+//    int owner = MonolingualRule.DUMMY_OWNER;
+//    int min = 1;
+//    int max = 5;
+//
+//    ArityPhrasePenaltyFF featureFunction = new ArityPhrasePenaltyFF(featureID, weight, owner, min, max);
+//
+//    int lhs = -1;
+//    int[] sourceRHS = {24, -1, 42, 738};
+//    int[] targetRHS = {-1, 7, 8};
+//    float[] featureScores = {-2.35f, -1.78f, -0.52f};
+//    int arity = 1;
+//
+//    Rule dummyRule = new BilingualRule(lhs, sourceRHS, targetRHS, featureScores, arity);
+//
+//    Assert.assertEquals(featureFunction.estimateLogP(dummyRule, -1), ArityPhrasePenaltyFF.ALPHA);
+//
+//  }
+//
+//}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java
new file mode 100644
index 0000000..233a6ed
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.ff.lm.berkeley_lm.LMGrammarBerkeley;
+import org.apache.joshua.decoder.ff.lm.buildin_lm.TrieLM;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for testing ARPA language model class.
+ * 
+ * @author Lane Schwartz
+ */
+public class ArpaFileTest {
+
+  String arpaFileName;
+
+  Vocabulary vocab;
+
+  @Test
+  public void setup() {
+
+    vocab = new Vocabulary();
+    vocab.id("a");
+    vocab.id("because");
+    vocab.id("boycott");
+    vocab.id("of");
+    vocab.id("parliament");
+    vocab.id("potato");
+    vocab.id("resumption");
+    vocab.id("the");
+
+    try {
+      File file = File.createTempFile("testLM", "arpa");
+      PrintStream out = new PrintStream(file, "UTF-8");
+
+      out.println();
+      out.println("\\data\\");
+      out.println("ngram 1=8");
+      out.println("ngram 2=4");
+      out.println("ngram 3=1");
+      out.println();
+
+      out.println("\\1-grams:");
+      out.println("-1.992672	a	-0.1195484");
+      out.println("-2.713723	because	-0.4665429");
+      out.println("-4.678545	boycott	-0.0902521");
+      out.println("-1.609573	of	-0.1991907");
+      out.println("-3.875917	parliament	-0.1274891");
+      out.println("-9.753210	potato");
+      out.println("-4.678545	resumption	-0.07945678");
+      out.println("-1.712444	the	-0.1606644");
+
+      out.println();
+      out.println("\\2-grams:");
+      out.println("-0.3552987	because of	-0.03083654");
+      out.println("-1.403534	of a");
+      out.println("-0.7507797	of the	-0.05237135");
+      out.println("-0.7266324	resumption of");
+      out.println("-3.936147	the resumption");
+
+      out.println();
+      out.println("\\3-grams:");
+      out.println("-0.6309999	because of the");
+      out.println();
+
+      out.println("\\end\\");
+
+      out.close();
+      this.arpaFileName = file.getAbsolutePath();
+
+    } catch (IOException e) {
+      Assert.fail("Unable to create temporary file: " + e.toString());
+    }
+
+  }
+
+  @Test(dependsOnMethods = { "setup" })
+  public void testOrder() {
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    try {
+      Assert.assertEquals(arpaFile.getOrder(), 3);
+    } catch (FileNotFoundException e) {
+      Assert.fail(e.toString());
+    }
+  }
+
+  @Test(dependsOnMethods = { "setup" })
+  public void testIteration() {
+
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    Map<Integer, Integer> counts = new HashMap<Integer, Integer>();
+
+    boolean iterationOccurred = false;
+
+    for (ArpaNgram ngram : arpaFile) {
+
+      iterationOccurred = true;
+
+      int order = ngram.order();
+      //			System.err.println("Order = " + order);
+
+      int count;
+      if (counts.containsKey(order)) {
+        count = counts.get(order) + 1;
+      } else {
+        count = 1;
+      }
+
+      counts.put(order, count);
+
+    }
+
+    Assert.assertTrue(iterationOccurred);
+
+    Assert.assertTrue(counts.containsKey(1));
+    Assert.assertTrue(counts.containsKey(2));
+    Assert.assertTrue(counts.containsKey(3));
+
+    Assert.assertEquals((int) counts.get(1), 8);
+    Assert.assertEquals((int) counts.get(2), 5);
+    Assert.assertEquals((int) counts.get(3), 1);
+
+  }
+
+  @Test(dependsOnMethods = { "setup" })
+  public void testSize() {
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    Assert.assertEquals(arpaFile.size(), 14);
+  }
+
+  @Test(dependsOnMethods = { "setup", "testIteration" })
+  public void testChildren() throws FileNotFoundException {
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    TrieLM lm = new TrieLM(arpaFile);
+    //		System.err.println(lm.getChildren().size());
+    Assert.assertNotSame(lm.getChildren().size(), 0);
+  }
+
+  @Test(dependsOnMethods = { "setup", "testIteration", "testChildren" })
+  public void testTrie() throws FileNotFoundException {
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    TrieLM lm = new TrieLM(arpaFile);
+
+    testLm(lm);
+
+  }
+
+  @Test(dependsOnMethods = { "setup", "testIteration", "testChildren" })
+  public void testBerkeley() throws FileNotFoundException {
+
+    LMGrammarBerkeley lm = new LMGrammarBerkeley(3, arpaFileName);
+
+    testLm(lm);
+
+  }
+
+  /**
+   * @param lm
+   */
+  private void testLm(NGramLanguageModel lm) {
+    // Test unigrams known to be in the language model
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("a")), -1.992672, 0.000001f);
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("because")), -2.713723, 0.000001f);
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("boycott")), -4.678545, 0.000001f);
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("of")), -1.609573, 0.000001f);
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("parliament")), -3.875917, 0.000001f);
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("potato")), -9.753210, 0.000001f);
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("resumption")), -4.678545, 0.000001f);
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("the")), -1.712444, 0.000001f);
+
+    // Test unigrams known to NOT be in the language model
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("banana")), -JoshuaConfiguration.lm_ceiling_cost, 0.000001f);
+
+    // Test bigrams known to be in the language model
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("because of")), -0.3552987, 0.000001f);
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("of the")), -0.7507797, 0.000001f);
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("resumption of")), -0.7266324, 0.000001f);
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("the resumption")), -3.936147, 0.000001f);
+
+    // Test trigrams known to be in the language model
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("because of the")), -0.6309999f, 0.000001f);
+
+    // Test bigrams know to NOT be in the language model (but the unigrams are)
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("a boycott")), -4.678545f + -0.1195484f, 0.000001f);
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("of parliament")), -3.875917f + -0.1991907f, 0.000001f);
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("the potato")), -9.753210f + -0.1606644f, 0.000001f);
+//    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("potato parliament")), -3.875917f + -0.0f, 0.000001f);
+
+    // Test trigrams know to NOT be in the language model (but the bigrams are)
+//    int[] words = vocab.getIDs("because of a");
+//    double f = lm.ngramLogProbability(words);
+//    Assert.assertEquals(f, -1.403534f + -0.03083654f, 0.000001f);
+    //		//Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("of the parliament")), -3.875917f + -0.05237135f, 0.000001f);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
new file mode 100644
index 0000000..d541fdc
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm;
+
+import static org.junit.Assert.*;
+import static org.hamcrest.CoreMatchers.*;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.ff.FeatureVector;
+import org.apache.joshua.decoder.ff.state_maintenance.NgramDPState;
+
+public class LanguageModelFFTest {
+
+  private static final float WEIGHT = 0.5f;
+
+  private LanguageModelFF ff;
+
+  @Before
+  public void setUp() {
+    Decoder.resetGlobalState();
+
+    FeatureVector weights = new FeatureVector();
+    weights.set("lm_0", WEIGHT);
+    String[] args = {"-lm_type", "berkeleylm", "-lm_order", "2", "-lm_file", "./src/test/resources/lm/berkeley/lm"};
+
+    JoshuaConfiguration config = new JoshuaConfiguration();
+    ff = new LanguageModelFF(weights, args, config);
+  }
+
+  @After
+  public void tearDown() {
+    Decoder.resetGlobalState();
+  }
+
+  @Test
+  public void givenNonStartSymbol_whenEstimateFutureCost_thenMultipleWeightAndLogProbabilty() {
+    int[] left = {3};
+    NgramDPState currentState = new NgramDPState(left, new int[left.length]);
+
+    float score = ff.languageModel.sentenceLogProbability(left, 2, 1);
+    assertEquals(-99.0f, score, 0.0);
+
+    float cost = ff.estimateFutureCost(null, currentState, null);
+    assertEquals(score * WEIGHT, cost, 0.0);
+  }
+
+  @Test
+  public void givenOnlyStartSymbol_whenEstimateFutureCost_thenZeroResult() {
+    int startSymbolId = Vocabulary.id(Vocabulary.START_SYM);
+    int[] left = {startSymbolId};
+    NgramDPState currentState = new NgramDPState(left, new int[left.length]);
+
+    float score = ff.languageModel.sentenceLogProbability(left, 2, 2);
+    assertEquals(0.0f, score, 0.0);
+
+    float cost = ff.estimateFutureCost(null, currentState, null);
+    assertEquals(score * WEIGHT, cost, 0.0);
+  }
+
+  @Test
+  public void givenStartAndOneMoreSymbol_whenEstimateFutureCost_thenMultipleWeightAndLogProbabilty() {
+    int startSymbolId = Vocabulary.id(Vocabulary.START_SYM);
+    assertThat(startSymbolId, not(equalTo(3)));
+    int[] left = {startSymbolId, 3};
+    NgramDPState currentState = new NgramDPState(left, new int[left.length]);
+
+    float score = ff.languageModel.sentenceLogProbability(left, 2, 2);
+    assertEquals(-100.752754f, score, 0.0f);
+
+    float cost = ff.estimateFutureCost(null, currentState, null);
+    assertEquals(score * WEIGHT, cost, 0.0f);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
new file mode 100644
index 0000000..bcc1039
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm.berkeley_lm;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+import edu.berkeley.nlp.lm.ArrayEncodedNgramLanguageModel;
+
+public class LMBerkeleySentenceProbablityTest {
+
+  @Test
+  public void verifySentenceLogProbability() {
+    LMGrammarBerkeley grammar = new LMGrammarBerkeley(2, "resources/berkeley_lm/lm");
+    grammar.registerWord("the", 2);
+    grammar.registerWord("chat-rooms", 3);
+    grammar.registerWord("<unk>", 0);
+
+    ArrayEncodedNgramLanguageModel<String> lm = grammar.getLM();
+    float expected =
+        lm.getLogProb(new int[] {}, 0, 0)
+        + lm.getLogProb(new int[] {0}, 0, 1)
+        + lm.getLogProb(new int[] {0, 2}, 0, 2)
+        + lm.getLogProb(new int[] {2, 3}, 0, 2)
+        + lm.getLogProb(new int[] {3, 0}, 0, 2);
+
+    float result = grammar.sentenceLogProbability(new int[] {0, 2, 3, 0}, 2, 0);
+    assertEquals(expected, result, 0.0);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
new file mode 100644
index 0000000..e5b2d69
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm.berkeley_lm;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.junit.After;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+import org.junit.runners.Parameterized.Parameters;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+/**
+ * Replacement for test/lm/berkeley/test.sh regression test
+ */
+@RunWith(value = Parameterized.class)
+public class LMGrammarBerkeleyTest {
+
+  private static final String INPUT = "the chat-rooms";
+  private static final String[] OPTIONS = "-v 0 -output-format %f".split(" ");
+
+  private JoshuaConfiguration joshuaConfig;
+  private Decoder decoder;
+
+  @Parameters
+  public static List<String> lmFiles() {
+    return Arrays.asList("resources/berkeley_lm/lm",
+        "resources/berkeley_lm/lm.gz",
+        "resources/berkeley_lm/lm.berkeleylm",
+        "resources/berkeley_lm/lm.berkeleylm.gz");
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    decoder.cleanUp();
+  }
+
+  @Parameter
+  public String lmFile;
+
+  @Test
+  public void verifyLM() {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.processCommandLineOptions(OPTIONS);
+    joshuaConfig.features.add("LanguageModel -lm_type berkeleylm -lm_order 2 -lm_file " + lmFile);
+    decoder = new Decoder(joshuaConfig, null);
+    String translation = decode(INPUT).toString();
+    assertEquals(lmFile, "tm_glue_0=2.000 lm_0=-7.153\n", translation);
+  }
+
+  private Translation decode(String input) {
+    final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+    return decoder.decode(sentence);
+  }
+}