You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/16 06:27:20 UTC
[64/66] incubator-joshua git commit: JOSHUA-252 Make it possible to use Maven to build Joshua

JOSHUA-252 Make it possible to use Maven to build Joshua


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/7f824b4e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/7f824b4e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/7f824b4e

Branch: refs/heads/JOSHUA-252
Commit: 7f824b4eceefc96813f38cb6bf8c4e5f404f5f44
Parents: f401535
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Sat May 14 13:53:40 2016 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Sat May 14 13:53:40 2016 -0700

----------------------------------------------------------------------
 pom.xml                                         |    12 +
 .../apache/joshua/corpus/CorpusArrayTest.java   |   183 +
 .../java/org/apache/joshua/corpus/SpanTest.java |    47 +
 .../joshua/corpus/vocab/VocabularyTest.java     |   182 +
 .../ArtificialGrammarAndCorpusCreater.java      |   130 +
 .../joshua/decoder/DecoderThreadTest.java       |   175 +
 .../joshua/decoder/JoshuaDecoderTest.java       |    83 +
 .../joshua/decoder/TestConfigFileCreater.java   |   184 +
 .../apache/joshua/decoder/TranslationsTest.java |    87 +
 .../decoder/ff/ArityPhrasePenaltyFFTest.java    |    64 +
 .../joshua/decoder/ff/lm/ArpaFileTest.java      |   227 +
 .../decoder/ff/lm/LanguageModelFFTest.java      |     3 +-
 .../lm/berkeley_lm/LMGrammarBerkeleyTest.java   |     5 +-
 .../joshua/decoder/io/DeNormalizeTest.java      |   273 +
 .../decoder/io/TranslationRequestTest.java      |   149 +
 .../segment_file/AlmostTooLongSentenceTest.java |    96 +
 .../decoder/segment_file/SentenceTest.java      |   108 +
 .../java/org/apache/joshua/lattice/ArcTest.java |    86 +
 .../org/apache/joshua/lattice/LatticeTest.java  |   197 +
 .../org/apache/joshua/lattice/NodeTest.java     |   108 +
 .../org/apache/joshua/packed/Benchmark.java     |   122 +
 .../org/apache/joshua/packed/CountRules.java    |   110 +
 .../org/apache/joshua/packed/PrintRules.java    |   195 +
 src/test/java/org/apache/joshua/packed/README   |     6 +
 .../org/apache/joshua/packed/VocabTest.java     |    51 +
 .../java/org/apache/joshua/packed/packer.config |     6 +
 .../java/org/apache/joshua/packed/small_grammar | 20000 +++++++++++++++++
 src/test/java/org/apache/joshua/packed/test.sh  |    20 +
 .../joshua/system/StructuredOutputTest.java     |    13 +-
 .../ui/tree_visualizer/tree/TreeTest.java       |   111 +
 .../java/org/apache/joshua/util/BitsTest.java   |   187 +
 .../java/org/apache/joshua/util/CacheTest.java  |    53 +
 .../java/org/apache/joshua/util/CountsTest.java |    98 +
 .../org/apache/joshua/util/io/BinaryTest.java   |    75 +
 .../java/org/apache/joshua/zmert/BLEUTest.java  |   134 +
 test/joshua/corpus/CorpusArrayTest.java         |   176 -
 test/joshua/corpus/SpanTest.java                |    46 -
 test/joshua/corpus/vocab/VocabularyTest.java    |   184 -
 .../ArtificialGrammarAndCorpusCreater.java      |   112 -
 test/joshua/decoder/DecoderThreadTest.java      |   178 -
 test/joshua/decoder/JoshuaDecoderTest.java      |    65 -
 test/joshua/decoder/TestConfigFileCreater.java  |   166 -
 test/joshua/decoder/TranslationsTest.java       |    66 -
 .../decoder/ff/ArityPhrasePenaltyFFTest.java    |    63 -
 test/joshua/decoder/ff/lm/ArpaFileTest.java     |   228 -
 test/joshua/decoder/io/DeNormalizeTest.java     |   255 -
 .../decoder/io/TranslationRequestTest.java      |   123 -
 .../segment_file/AlmostTooLongSentenceTest.java |    78 -
 .../decoder/segment_file/SentenceTest.java      |    90 -
 test/joshua/lattice/ArcTest.java                |    82 -
 test/joshua/lattice/LatticeTest.java            |   194 -
 test/joshua/lattice/NodeTest.java               |   106 -
 .../ui/tree_visualizer/tree/TreeTest.java       |    93 -
 test/joshua/util/BitsTest.java                  |   186 -
 test/joshua/util/CacheTest.java                 |    35 -
 test/joshua/util/CountsTest.java                |    97 -
 test/joshua/util/io/BinaryTest.java             |    58 -
 test/joshua/zmert/BLEUTest.java                 |   133 -
 test/packed/Benchmark.java                      |   104 -
 test/packed/CountRules.java                     |    92 -
 test/packed/PrintRules.java                     |   177 -
 test/packed/README                              |     6 -
 test/packed/VocabTest.java                      |    33 -
 test/packed/packer.config                       |     6 -
 test/packed/small_grammar                       | 20000 -----------------
 test/packed/test.sh                             |    20 -
 66 files changed, 23570 insertions(+), 23262 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index b309eb1..740a677 100644
--- a/pom.xml
+++ b/pom.xml
@@ -176,5 +176,17 @@
       <version>4.10</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.testng</groupId>
+      <artifactId>testng</artifactId>
+      <version>6.9.10</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <version>2.0.52-beta</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/corpus/CorpusArrayTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/corpus/CorpusArrayTest.java b/src/test/java/org/apache/joshua/corpus/CorpusArrayTest.java
new file mode 100644
index 0000000..e7653de
--- /dev/null
+++ b/src/test/java/org/apache/joshua/corpus/CorpusArrayTest.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.corpus;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Date;
+import java.util.logging.Logger;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class CorpusArrayTest {
+
+  /** Logger for this class. */
+  private static Logger logger =
+      Logger.getLogger(CorpusArrayTest.class.getName());
+
+  @Test
+  public void writePartsToDisk() {
+
+    String filename = "data/tiny.en";
+    int numSentences = 5;  // Should be 5 sentences in tiny.en
+    int numWords = 89;     // Should be 89 words in tiny.en
+
+
+    try {
+
+      // FIX: can't use createVocabulary(String) because we set numWords and numSentences
+      Vocabulary vocab = new Vocabulary();
+      SuffixArrayFactory.createVocabulary(filename, vocab);
+      CorpusArray corpus = SuffixArrayFactory.createCorpusArray(filename, vocab, numWords, numSentences);
+
+      corpus.writeWordIDsToFile(filename+".bin");
+      corpus.writeSentenceLengthsToFile(filename+".sbin");
+
+      MemoryMappedCorpusArray mmCorpus = new MemoryMappedCorpusArray(corpus.getVocabulary(), filename+".bin", numWords*4, filename+".sbin", numSentences*4);
+
+      // For each word in the corpus,
+      for (int i=0; i<corpus.size(); i++) {
+
+        // Verify that the memory-mapped corpus and the in-memory corpus have the same value
+        Assert.assertEquals(mmCorpus.getWordID(i), corpus.getWordID(i));
+      }
+
+
+      // For each sentence in the corpus
+      for (int i=0; i<corpus.sentences.length; i++) {
+
+        // Verify that the sentence position in the memory-mapped corpus and the in-memory corpus have the same value
+        Assert.assertEquals(corpus.getSentencePosition(i), mmCorpus.getSentencePosition(i));
+      }
+
+    } catch (IOException e) {
+      Assert.fail(e.getLocalizedMessage());
+    }
+
+  }
+
+  @Test
+  public void iterate() {
+
+    String[] sentences = {
+        "scientists complete sequencing of the chromosome linked to early dementia",
+        "( afp , paris , january 2 ) an international team of scientists said that they have completed the sequencing of human chromosome 14 that is linked to many diseases , including the early-onset alzheimer's that may strike people in their 30s .",
+        "this is the fourth chromosome whose sequence has been completed to date . it comprises more than 87 million pairs of dna .",
+        "this study published in the weekly british scientific journal nature illustrates that the sequence of chromosome 14 comprises 1,050 genes and gene fragments .",
+        "the goal of geneticists is to provide diagnostic tools to identify defective genes that cause diseases so as to arrive eventually at treatments that can prevent those genes from malfunctioning ."
+    };
+
+
+
+    // Tell System.out and System.err to use UTF8
+    FormatUtil.useUTF8();
+
+    try {
+
+      File sourceFile = File.createTempFile("source", new Date().toString());
+      PrintStream sourcePrintStream = new PrintStream(sourceFile, "UTF-8");
+      for (String sentence : sentences) {
+        sourcePrintStream.println(sentence);
+      }
+      sourcePrintStream.close();
+      String corpusFileName = sourceFile.getAbsolutePath();
+
+      Vocabulary vocabulary;
+
+      logger.fine("Constructing vocabulary from file " + corpusFileName);
+      vocabulary = new Vocabulary();
+      int[] lengths = Vocabulary.initializeVocabulary(corpusFileName, vocabulary, true);
+
+      logger.fine("Constructing corpus array from file " + corpusFileName);
+      Corpus corpus = SuffixArrayFactory.createCorpusArray(corpusFileName, vocabulary, lengths[0], lengths[1]);
+
+      int expectedIndex = 0;
+      for (int actualIndex : corpus.corpusPositions()) {
+        Assert.assertEquals(actualIndex, expectedIndex);
+        expectedIndex += 1;
+      }
+
+      Assert.assertEquals(corpus.size(), expectedIndex);
+
+
+    } catch (IOException e) {
+      Assert.fail("Unable to write temporary file. " + e.toString());
+    }
+
+
+
+  }
+
+
+  @Test
+  public void writeAllToDisk() throws ClassNotFoundException {
+
+    String filename = "data/tiny.en";
+    int numSentences = 5;  // Should be 5 sentences in tiny.en
+    int numWords = 89;     // Should be 89 words in tiny.en
+
+
+    try {
+
+      // FIX: can't use createVocabulary(String) because we set numWords and numSentences
+      Vocabulary vocab = new Vocabulary();
+      Vocabulary.initializeVocabulary(filename, vocab, true);
+      CorpusArray corpus = SuffixArrayFactory.createCorpusArray(filename, vocab, numWords, numSentences);
+
+      corpus.write(filename+".corpus", filename+".vocab", "UTF-8");
+
+      MemoryMappedCorpusArray mmCorpus = new MemoryMappedCorpusArray(filename+".corpus", filename+".vocab");
+
+      Assert.assertEquals(mmCorpus.size(), corpus.size());
+      Assert.assertEquals(mmCorpus.getNumSentences(), corpus.getNumSentences());
+
+      // For each word in the corpus,
+      for (int i=0; i<corpus.size(); i++) {
+
+        // Verify that the memory-mapped corpus and the in-memory corpus have the same value
+        Assert.assertEquals(mmCorpus.getWordID(i), corpus.getWordID(i));
+      }
+
+
+      // For each sentence in the corpus
+      for (int i=0; i<corpus.sentences.length; i++) {
+
+        // Verify that the sentence start position in the memory-mapped corpus and the in-memory corpus have the same value
+        Assert.assertEquals(mmCorpus.getSentencePosition(i), corpus.getSentencePosition(i));
+
+        // Verify that the sentence end position in the memory-mapped corpus and the in-memory corpus have the same value
+        Assert.assertEquals(mmCorpus.getSentenceEndPosition(i), corpus.getSentenceEndPosition(i));
+
+        // Verify that the phrase corresponding to this sentence is the same
+        Phrase sentence = corpus.getSentence(i);
+        Phrase mmSentence = mmCorpus.getSentence(i);
+        Assert.assertNotNull(sentence);
+        Assert.assertNotNull(mmSentence);
+        Assert.assertEquals(mmSentence, sentence);
+      }
+
+    } catch (IOException e) {
+      Assert.fail(e.getLocalizedMessage());
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/corpus/SpanTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/corpus/SpanTest.java b/src/test/java/org/apache/joshua/corpus/SpanTest.java
new file mode 100644
index 0000000..3558b79
--- /dev/null
+++ b/src/test/java/org/apache/joshua/corpus/SpanTest.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.corpus;
+
+import org.apache.joshua.corpus.Span;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ *
+ * 
+ * @author Lane Schwartz
+ */
+public class SpanTest {
+
+  @Test
+  public void iterator() {
+
+    Span span = new Span(1,10);
+
+    int expected = 1;
+
+    for (int actual : span) {
+      Assert.assertEquals(actual, expected);
+      expected++;
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/corpus/vocab/VocabularyTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/corpus/vocab/VocabularyTest.java b/src/test/java/org/apache/joshua/corpus/vocab/VocabularyTest.java
new file mode 100644
index 0000000..2db9519
--- /dev/null
+++ b/src/test/java/org/apache/joshua/corpus/vocab/VocabularyTest.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.corpus.vocab;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Date;
+import java.util.HashSet;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ *
+ * 
+ * @author Lane Schwartz
+ */
+public class VocabularyTest {
+
+  /** [X], [X,1], [X,2], [S], [S,1] <unk>, <s>, </s>, -pau-*/
+  int numBuiltInSymbols = 9;
+
+  /** <unk>, <s>, </s>, -pau- */
+  int numBuiltInTerminals = 4;
+
+  @Test
+  public void basicVocabTest() {
+
+    Vocabulary vocab1 = new Vocabulary();
+    Vocabulary vocab2 = new Vocabulary(new HashSet<String>());
+
+    Assert.assertEquals(vocab1, vocab2);
+
+    Assert.assertFalse(vocab1.intToString.isEmpty());
+    //		Assert.assertTrue(vocab1.intToString.get(0)==Vocabulary.UNKNOWN_WORD_STRING);
+    Assert.assertFalse(vocab1.getWords().isEmpty());
+    Assert.assertTrue(vocab1.getWord(0)==Vocabulary.UNKNOWN_WORD_STRING);
+    Assert.assertEquals(vocab1.getWords(), vocab1.intToString.values());
+
+    Assert.assertEquals(vocab1.size(), numBuiltInSymbols);
+    Assert.assertEquals(vocab1.getWord(Vocabulary.UNKNOWN_WORD), Vocabulary.UNKNOWN_WORD_STRING);
+
+    //Assert.assertEquals(vocab1.getID("sample"), Vocabulary.UNKNOWN_WORD);
+    //Assert.assertEquals(vocab1.getID(null), Vocabulary.UNKNOWN_WORD);
+
+    Assert.assertFalse(vocab1.terminalToInt.isEmpty());
+    Assert.assertEquals(vocab1.terminalToInt.size(), this.numBuiltInTerminals);
+    //		Assert.assertFalse(vocab1.isFixed);
+    //		
+    //		vocab1.fixVocabulary();
+    //		Assert.assertTrue(vocab1.isFixed);
+
+    Assert.assertEquals(vocab1.getID(Vocabulary.X_STRING), -1);
+    Assert.assertEquals(vocab1.getID(Vocabulary.X1_STRING), -2);
+    Assert.assertEquals(vocab1.getID(Vocabulary.X2_STRING), -3);
+
+    Assert.assertEquals(vocab1.getWord(-1), Vocabulary.X_STRING);
+    Assert.assertEquals(vocab1.getWord(-2), Vocabulary.X1_STRING);
+    Assert.assertEquals(vocab1.getWord(-3), Vocabulary.X2_STRING);
+
+
+
+    Assert.assertFalse(vocab2.intToString.isEmpty());
+    //		Assert.assertTrue(vocab2.intToString.get(0)==Vocabulary.UNKNOWN_WORD_STRING);
+    Assert.assertFalse(vocab2.getWords().isEmpty());
+    //		Assert.assertTrue(vocab2.getWord(0)==Vocabulary.UNKNOWN_WORD_STRING);
+    Assert.assertEquals(vocab2.getWords(), vocab2.intToString.values());
+
+    Assert.assertEquals(vocab2.size(), numBuiltInSymbols);
+    Assert.assertEquals(vocab2.getWord(Vocabulary.UNKNOWN_WORD), Vocabulary.UNKNOWN_WORD_STRING);
+
+    //		Assert.assertEquals(vocab2.getID("sample"), Vocabulary.UNKNOWN_WORD);
+    //		Assert.assertEquals(vocab2.getID(null), Vocabulary.UNKNOWN_WORD);
+
+    Assert.assertFalse(vocab2.terminalToInt.isEmpty());
+    Assert.assertEquals(vocab2.terminalToInt.size(), this.numBuiltInTerminals);
+    //		Assert.assertTrue(vocab2.isFixed);
+
+
+
+  }
+
+  @Test
+  public void verifyWordIDs() throws IOException {
+
+    // Adam Lopez's example...
+    String corpusString = "it makes him and it mars him , it sets him on and it takes him off .";
+    //		String queryString = "it persuades him and it disheartens him";
+
+    String sourceFileName;
+    {
+      File sourceFile = File.createTempFile("source", new Date().toString());
+      PrintStream sourcePrintStream = new PrintStream(sourceFile, "UTF-8");
+      sourcePrintStream.println(corpusString);
+      sourcePrintStream.close();
+      sourceFileName = sourceFile.getAbsolutePath();
+    }
+
+    Vocabulary vocab = new Vocabulary();
+    Vocabulary.initializeVocabulary(sourceFileName, vocab, true);
+
+    Assert.assertEquals(vocab.getWord(vocab.getID("it")), "it");
+    Assert.assertEquals(vocab.getWord(vocab.getID("makes")), "makes");
+    Assert.assertEquals(vocab.getWord(vocab.getID("him")), "him");
+    Assert.assertEquals(vocab.getWord(vocab.getID("and")), "and");
+    Assert.assertEquals(vocab.getWord(vocab.getID("mars")), "mars");
+    Assert.assertEquals(vocab.getWord(vocab.getID(",")), ",");
+    Assert.assertEquals(vocab.getWord(vocab.getID("sets")), "sets");
+    Assert.assertEquals(vocab.getWord(vocab.getID("on")), "on");
+    Assert.assertEquals(vocab.getWord(vocab.getID("takes")), "takes");
+    Assert.assertEquals(vocab.getWord(vocab.getID("off")), "off");
+
+    //		Assert.assertEquals(vocab.getWord(vocab.getID("persuades")), Vocabulary.UNKNOWN_WORD_STRING);
+    //		Assert.assertEquals(vocab.getWord(vocab.getID("disheartens")), Vocabulary.UNKNOWN_WORD_STRING);
+  }
+
+  @Test
+  public void loadVocabFromFile() {
+
+    String filename = "data/tiny.en";
+    int numSentences = 5;  // Should be 5 sentences in tiny.en
+    int numWords = 89;     // Should be 89 words in tiny.en
+    int numUniqWords = 60; // Should be 60 unique words in tiny.en
+
+    Vocabulary vocab = new Vocabulary();
+    Vocabulary vocab2 = new Vocabulary();
+
+    Assert.assertTrue(vocab.equals(vocab2));
+    Assert.assertTrue(vocab2.equals(vocab));
+    Assert.assertEquals(vocab, vocab2);
+
+    try {
+      int[] result = Vocabulary.initializeVocabulary(filename, vocab, true);
+      Assert.assertNotNull(result);
+      Assert.assertEquals(result.length, 2);
+      Assert.assertEquals(result[0], numWords); 
+      Assert.assertEquals(result[1], numSentences);  
+
+      //			Assert.assertTrue(vocab.isFixed);
+      Assert.assertEquals(vocab.size(), numUniqWords+numBuiltInSymbols);
+
+    } catch (IOException e) {
+      Assert.fail("Could not load file " + filename);
+    }
+
+    Assert.assertFalse(vocab.equals(vocab2));
+
+    try {
+      int[] result = Vocabulary.initializeVocabulary(filename, vocab2, true);
+      Assert.assertNotNull(result);
+      Assert.assertEquals(result.length, 2);
+      Assert.assertEquals(result[0], numWords); 
+      Assert.assertEquals(result[1], numSentences);  
+
+      //			Assert.assertTrue(vocab2.isFixed);
+      Assert.assertEquals(vocab2.size(), numUniqWords+numBuiltInSymbols);
+
+    } catch (IOException e) {
+      Assert.fail("Could not load file " + filename);
+    }
+
+    Assert.assertEquals(vocab, vocab2);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/decoder/ArtificialGrammarAndCorpusCreater.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/ArtificialGrammarAndCorpusCreater.java b/src/test/java/org/apache/joshua/decoder/ArtificialGrammarAndCorpusCreater.java
new file mode 100644
index 0000000..5cc5996
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/ArtificialGrammarAndCorpusCreater.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.joshua.util.FileUtility;
+
+public class ArtificialGrammarAndCorpusCreater {
+
+  private static final String JOSHUA_RULE_SEPARATOR = " ||| ";
+  private static final String ARTIFICAL_TERMINAL_RULE1 = "[T1]" + JOSHUA_RULE_SEPARATOR + "garcon"
+      + JOSHUA_RULE_SEPARATOR + "boy" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_TERMINAL_RULE2 = "[T2]" + JOSHUA_RULE_SEPARATOR + "fille"
+      + JOSHUA_RULE_SEPARATOR + "girl" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_TERMINAL_RULE3 = "[T3]" + JOSHUA_RULE_SEPARATOR + "garcon"
+      + JOSHUA_RULE_SEPARATOR + "mister" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_TERMINAL_RULE4 = "[T4]" + JOSHUA_RULE_SEPARATOR + "fille"
+      + JOSHUA_RULE_SEPARATOR + "woman" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_TERMINAL_RULE5 = "[T5]" + JOSHUA_RULE_SEPARATOR + "fille"
+      + JOSHUA_RULE_SEPARATOR + "lady" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_NONTERTERMINAL_RULE1 = "[NT1]" + JOSHUA_RULE_SEPARATOR
+      + "le [T1,1] aime la [T2,2]" + JOSHUA_RULE_SEPARATOR + "the [T1,1] loves the [T2,2]"
+      + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_NONTERTERMINAL_RULE_INVERTED = "[NT1]"
+      + JOSHUA_RULE_SEPARATOR + "le [T1,1] aime la [T2,2]" + JOSHUA_RULE_SEPARATOR
+      + "the [T2,2] loves the [T1,1]" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+  private static final String ARTIFICAL_TERMINAL_RULE6 = "[T6]" + JOSHUA_RULE_SEPARATOR + "garcon"
+      + JOSHUA_RULE_SEPARATOR + "sir" + JOSHUA_RULE_SEPARATOR + "0.5 0.4";
+
+  private static final String GLUE_RULE_BEGIN = "[GOAL] ||| <s> ||| <s> ||| 0";
+  private static final String GLUE_RULE_NT = "[GOAL] ||| [GOAL,1] [NT1,2] ||| [GOAL,1] [NT1,2] ||| -1";
+  private static final String GLUE_RULE_END = "[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0";
+
+  private static final String TEST_SENTENCE1 = "le garcon aime la fille";
+
+  private static final List<String> getArtificalGrammarsList1() {
+    List<String> result = Arrays.asList(ARTIFICAL_TERMINAL_RULE1, ARTIFICAL_TERMINAL_RULE2,
+        ARTIFICAL_TERMINAL_RULE3, ARTIFICAL_TERMINAL_RULE4, ARTIFICAL_TERMINAL_RULE5,
+        ARTIFICAL_TERMINAL_RULE6, ARTIFICAL_NONTERTERMINAL_RULE1);
+    return result;
+  }
+
+  private static List<String> getArtificalGrammarsList2() {
+    List<String> result = new ArrayList<String>(getArtificalGrammarsList1());
+    result.add(ARTIFICAL_NONTERTERMINAL_RULE_INVERTED);
+    return result;
+  }
+
+  private static final List<String> ARTIFICIAL_GLUE_GRAMMAR_RULES_LIST = Arrays.asList(
+      GLUE_RULE_BEGIN, GLUE_RULE_NT, GLUE_RULE_END);
+
+  private final String mainGrammarFilePath;
+  private final String glueGrammarFilePath;
+  private final String testSentencesFilePath;
+
+  private ArtificialGrammarAndCorpusCreater(String mainGrammarFilePath, String glueGrammarFilePath,
+      String testSentencesFilePath) {
+    this.mainGrammarFilePath = mainGrammarFilePath;
+    this.glueGrammarFilePath = glueGrammarFilePath;
+    this.testSentencesFilePath = testSentencesFilePath;
+  }
+
+  public static ArtificialGrammarAndCorpusCreater createArtificialGrammarAndCorpusCreater(
+      String mainGrammarFilePath, String glueGrammarFilePath, String testSentencesFilePath) {
+    return new ArtificialGrammarAndCorpusCreater(mainGrammarFilePath, glueGrammarFilePath,
+        testSentencesFilePath);
+  }
+
+  private static final void writeFile(String filePath, List<String> lines) {
+    BufferedWriter outputWriter = null;
+    try {
+      outputWriter = new BufferedWriter(new FileWriter(filePath));
+      for (int i = 0; i < lines.size() - 1; i++) {
+        outputWriter.write(lines.get(i) + "\n");
+      }
+      if (!lines.isEmpty()) {
+        outputWriter.write(lines.get(lines.size() - 1));
+      }
+    } catch (IOException e) {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
+    } finally {
+      FileUtility.closeCloseableIfNotNull(outputWriter);
+    }
+  }
+
+  protected final void writeMainGrammar(boolean includeInvertingNonterminalRule) {
+    List<String> ruleList;
+    if(includeInvertingNonterminalRule)
+    {
+      ruleList = getArtificalGrammarsList2();
+    }
+    else{
+      ruleList = getArtificalGrammarsList1();
+    }
+
+    writeFile(mainGrammarFilePath,ruleList);
+  }
+
+  protected final void writeGlueGrammar() {
+    writeFile(glueGrammarFilePath, ARTIFICIAL_GLUE_GRAMMAR_RULES_LIST);
+  }
+
+  protected final void writeTestSentencesFile1() {
+    writeFile(testSentencesFilePath, Arrays.asList(TEST_SENTENCE1));
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/decoder/DecoderThreadTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/DecoderThreadTest.java b/src/test/java/org/apache/joshua/decoder/DecoderThreadTest.java
new file mode 100644
index 0000000..0631412
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/DecoderThreadTest.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Date;
+import java.util.Scanner;
+
+import org.apache.joshua.corpus.Corpus;
+import org.apache.joshua.corpus.Vocabulary;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for decoder thread.
+ * 
+ * @author Lane Schwartz
+ * @version $LastChangedDate$
+ */
+public class DecoderThreadTest {
+
+  @Test
+  public void setup() {
+
+    String[] sourceSentences = {
+        "a b c d",
+        "a b c d",
+        "a b c d"
+    };
+
+    String[] targetSentences = {
+        "w x y z",
+        "w t u v",
+        "s x y z"
+    };
+
+    String[] alignmentLines = {
+        "0-0 1-1 2-2 3-3",
+        "0-0 1-1 2-2 3-3",
+        "0-0 1-1 2-2 3-3"
+    };
+
+    String[] testSentences = {
+        "a b c"	
+    };
+
+    try {
+
+      // Set up source corpus
+      File sourceFile = File.createTempFile("source", new Date().toString());
+      PrintStream sourcePrintStream = new PrintStream(sourceFile, "UTF-8");
+      for (String sentence : sourceSentences) {
+        sourcePrintStream.println(sentence);
+      }
+      sourcePrintStream.close();
+      String sourceCorpusFileName = sourceFile.getAbsolutePath();
+
+      Vocabulary vocabulary = new Vocabulary();
+      int[] sourceLengths = Vocabulary.initializeVocabulary(sourceCorpusFileName, vocabulary, true);
+      Assert.assertEquals(sourceLengths.length, 2);
+      int numberOfSentences = sourceLengths[1];
+
+      Corpus sourceCorpus = SuffixArrayFactory.createCorpusArray(sourceCorpusFileName, vocabulary, sourceLengths[0], sourceLengths[1]);
+
+
+      // Set up target corpus
+      File targetFile = File.createTempFile("target", new Date().toString());
+      PrintStream targetPrintStream = new PrintStream(targetFile, "UTF-8");
+      for (String sentence : targetSentences) {
+        targetPrintStream.println(sentence);
+      }
+      targetPrintStream.close();
+      String targetCorpusFileName = targetFile.getAbsolutePath();
+
+      int[] targetLengths = Vocabulary.initializeVocabulary(targetCorpusFileName, vocabulary, true);
+      Assert.assertEquals(targetLengths.length, sourceLengths.length);
+      for (int i=0, n=targetLengths.length; i<n; i++) {
+        Assert.assertEquals(targetLengths[i], sourceLengths[i]);
+      }
+
+      Corpus targetCorpus = SuffixArrayFactory.createCorpusArray(targetCorpusFileName, vocabulary, targetLengths[0], targetLengths[1]);
+
+
+      // Construct alignments data structure
+      File alignmentsFile = File.createTempFile("alignments", new Date().toString());
+      PrintStream alignmentsPrintStream = new PrintStream(alignmentsFile, "UTF-8");
+      for (String sentence : alignmentLines) {
+        alignmentsPrintStream.println(sentence);
+      }
+      alignmentsPrintStream.close();
+      String alignmentFileName = alignmentsFile.getAbsolutePath();
+
+      AlignmentGrids grids = new AlignmentGrids(
+          new Scanner(alignmentsFile), 
+          sourceCorpus, 
+          targetCorpus, 
+          numberOfSentences);
+
+
+      // Set up test corpus
+      File testFile = File.createTempFile("test", new Date().toString());
+      PrintStream testPrintStream = new PrintStream(testFile, "UTF-8");
+      for (String sentence : testSentences) {
+        testPrintStream.println(sentence);
+      }
+      testPrintStream.close();
+      String testFileName = testFile.getAbsolutePath();
+
+      // Filename of the extracted rules file.
+      String rulesFileName; {	
+        File rulesFile = File.createTempFile("rules", new Date().toString());
+        rulesFileName = rulesFile.getAbsolutePath();
+      }
+
+      String joshDirName; {
+        File joshDir = File.createTempFile(new Date().toString(), "josh");
+        joshDirName = joshDir.getAbsolutePath();
+        joshDir.delete();
+      }
+
+
+      Compile compileJoshDir = new Compile();
+      compileJoshDir.setSourceCorpus(sourceCorpusFileName);
+      compileJoshDir.setTargetCorpus(targetCorpusFileName);
+      compileJoshDir.setAlignments(alignmentFileName);
+      compileJoshDir.setOutputDir(joshDirName);
+      compileJoshDir.execute();
+
+      ExtractRules extractRules = new ExtractRules();
+      extractRules.setJoshDir(joshDirName);
+      extractRules.setTestFile(testFileName);
+      extractRules.setOutputFile(rulesFileName);
+      extractRules.execute();
+
+    } catch (IOException e) {
+      Assert.fail("Unable to write temporary file. " + e.toString());
+    } catch (ClassNotFoundException e) {
+      Assert.fail("Unable to extract rules. " + e.toString());
+    }
+  }
+
+  @Test
+  public void basicSuffixArrayGrammar() {
+
+    // Write configuration to temp file on disk
+    //		String configFile;
+
+
+    //		JoshuaDecoder decoder = 
+    //			JoshuaDecoder.getUninitalizedDecoder(configFile);
+
+
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/decoder/JoshuaDecoderTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/JoshuaDecoderTest.java b/src/test/java/org/apache/joshua/decoder/JoshuaDecoderTest.java
new file mode 100644
index 0000000..2a878f3
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/JoshuaDecoderTest.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Scanner;
+
+import org.testng.Assert;
+import org.testng.annotations.Parameters;
+import org.testng.annotations.Test;
+
+/**
+ * Performs regression tests to verify that the decoder produces expected output
+ * on known data sets.
+ * 
+ * @author Lane Schwartz
+ */
+public class JoshuaDecoderTest {
+
+  @Parameters({ "configFile", "sourceInput", "referenceOutput" })
+  @Test
+  public void regressionTest(String configFile, String sourceInput, String referenceOutput)
+      throws IOException {
+
+    File referenceFile = new File(referenceOutput);
+    File output = File.createTempFile("output", null);// ,
+                                                      // referenceFile.getParentFile());
+
+    String[] args = { configFile, sourceInput, output.getAbsoluteFile().toString() };
+    JoshuaDecoder.main(args);
+
+    Scanner resultScanner = new Scanner(output);
+    Scanner refScanner = new Scanner(referenceFile);
+
+    while (resultScanner.hasNextLine() && refScanner.hasNextLine()) {
+
+      String resultLine = resultScanner.nextLine();
+      String refLine = refScanner.nextLine();
+
+      String[] resultParts = resultLine.split(" \\|\\|\\| ");
+      String[] refParts = refLine.split(" \\|\\|\\| ");
+
+      Assert.assertEquals(resultParts.length, 4);
+      Assert.assertEquals(refParts.length, 4);
+
+      Assert.assertEquals(Integer.parseInt(resultParts[0]), Integer.parseInt(refParts[0]));
+      Assert.assertEquals(resultParts[1], refParts[1]);
+
+      String[] resultFeatures = resultParts[2].split(" ");
+      String[] refFeatures = refParts[2].split(" ");
+
+      Assert.assertEquals(resultFeatures.length, 5);
+      Assert.assertEquals(refFeatures.length, 5);
+
+      float acceptableDelta = 0.001f;
+      for (int i = 0; i < refFeatures.length; i++) {
+        Assert.assertEquals(Float.valueOf(resultFeatures[i]), Float.valueOf(refFeatures[i]),
+            acceptableDelta);
+      }
+    }
+    
+    resultScanner.close();
+    refScanner.close();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/decoder/TestConfigFileCreater.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/TestConfigFileCreater.java b/src/test/java/org/apache/joshua/decoder/TestConfigFileCreater.java
new file mode 100644
index 0000000..5399bab
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/TestConfigFileCreater.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.List;
+import org.apache.joshua.util.FileUtility;
+
+public class TestConfigFileCreater {
+
+
+  protected static String LANGUAGE_MODEL_FILE_NAME = "lm.gz";
+  private static final String NL = "\n";
+  private static final Double NEW_FEATURES_WEIGHT = 0.2;
+
+  private final String testTempFilesFolderName;
+  private final String mainGrammarFileName;
+  private final String glueGrammarFileName;
+  private final List<Double> phraseTableWeights;
+  private final boolean useSoftSyntacticDecoding;
+  private final boolean switchOfPruning;
+
+  private TestConfigFileCreater(String testTemFilesFolderName, String mainGrammarFileName,
+      String glueGrammarFileName, List<Double> phraseTableWeights,
+      boolean useSoftSyntacticDecoding, boolean switchOfPruning) {
+    this.testTempFilesFolderName = testTemFilesFolderName;
+    this.mainGrammarFileName = mainGrammarFileName;
+    this.glueGrammarFileName = glueGrammarFileName;
+    this.phraseTableWeights = phraseTableWeights;
+    this.useSoftSyntacticDecoding = useSoftSyntacticDecoding;
+    this.switchOfPruning = switchOfPruning;
+  }
+
+  public static TestConfigFileCreater createFeaturesTestConfigFileCreater(
+      String testTemFilesFolderName, String mainGrammarFileName, String glueGrammarFileName,
+
+      List<Double> phraseTableWeights, boolean useSoftSyntacticDecoding, boolean switchOfPruning) {
+    return new TestConfigFileCreater(testTemFilesFolderName, mainGrammarFileName,
+        glueGrammarFileName, phraseTableWeights, useSoftSyntacticDecoding, switchOfPruning);
+  }
+
+  private final String createGlueGrammarFileSpecificationLine() {
+    return "tm = thrax glue -1 " + "./" + testTempFilesFolderName + "/" + glueGrammarFileName;
+  }
+
+  private final String createMainGrammarFileSpecificationLine() {
+    return "tm = thrax pt 12 " + "./" + testTempFilesFolderName + "/" + mainGrammarFileName;
+  }
+
+  private static String getFeatureSwitchOnString(String featureFunctionName) {
+    return "feature-function = " + featureFunctionName;
+  }
+
+  public String getPruningSpecification() {
+    if (switchOfPruning) {
+      return "pop-limit = 0" + NL;
+    } else {
+      return "pop-limit = 100" + NL;
+    }
+  }
+
+  // Large String containing the mostly static, partly dynamic generated mose config
+  // file contents used for the test
+  private final String getJoshuaConfigFileFirstPart(boolean useSoftSyntacticDecoding) {
+    String result = "lm = kenlm 5 false false 100 " + createFullPath(LANGUAGE_MODEL_FILE_NAME) + NL
+        + createMainGrammarFileSpecificationLine() + NL + createGlueGrammarFileSpecificationLine()
+        + NL + "mark_oovs=false" + NL + "#tm config" + NL + "default_non_terminal = OOV" + NL
+        + "goalSymbol = GOAL" + NL + "#pruning config" + NL + getPruningSpecification()
+        + JoshuaConfiguration.SOFT_SYNTACTIC_CONSTRAINT_DECODING_PROPERTY_NAME + " = "
+        + useSoftSyntacticDecoding + NL + "#nbest config" + NL + "use_unique_nbest = true" + NL
+
+        + "top_n = 100" // + NL +
+        // "feature-function = OOVPenalty"
+        + NL + "feature-function = WordPenalty";
+    return result;
+  }
+
+  private final String createPhraseTableSpecificationString() {
+    String result = "";
+    for (int i = 0; i < phraseTableWeights.size(); i++) {
+      double phraseTableWeight = phraseTableWeights.get(i);
+      result += "tm_pt_" + i + " " + phraseTableWeight + NL;
+    }
+    return result;
+  }
+
+  private final String getMosesConfigFilePart2() {
+    String retsult = "###### model weights" + NL + "#lm order weight" + NL
+        + "WordPenalty -3.0476045270236662" + NL + createPhraseTableSpecificationString()
+        + "lm_0 1.3200621467242506"
+        // "#phrasemodel owner column(0-indexed)"
+        + NL + "tm_glue_0 1" + NL + "oovpenalty -100.0" + NL;
+    return retsult;
+  }
+
+  // private static final int NO_PHRASE_WEIGTHS = 22;
+
+  /*
+   * private static String createPhraseWeightsSpecification() { String result =
+   * "#phrasemodel owner column(0-indexed) weight" + NL; for (int i = 0; i < NO_PHRASE_WEIGTHS; i++)
+   * { result += "tm_pt_" + i + 0.5; } return result; }
+   */
+
+  private static String createFeatureWeightSpecifications(List<String> featureNames,
+      double featureWeight) {
+    String result = "";
+    for (String featureName : featureNames) {
+      result += featureName + " " + featureWeight + "\n";
+    }
+    return result;
+  }
+
+  protected String createJoshuaConfigFileContentsWithExtraFeatures(String featureFunctionName,
+      List<String> featureNames) {
+    String result = createJoshuaConfigFileContents(featureFunctionName);
+    result += createFeatureWeightSpecifications(featureNames, NEW_FEATURES_WEIGHT);
+    return result;
+  }
+
+  protected String createJoshuaConfigFileContents(String featureFunctionName) {
+    String result = getJoshuaConfigFileFirstPart(useSoftSyntacticDecoding);
+    result += NL + getFeatureSwitchOnString(featureFunctionName) + NL;
+    result += getMosesConfigFilePart2();
+    return result;
+  }
+
+  protected String createJoshuaConfigFileContents() {
+    String result = getJoshuaConfigFileFirstPart(useSoftSyntacticDecoding);
+    result += NL;
+    result += getMosesConfigFilePart2();
+    return result;
+  }
+
+  protected static void writeContents(String filePath, String contents) {
+    BufferedWriter outputWriter = null;
+    try {
+      outputWriter = new BufferedWriter(new FileWriter(filePath));
+      outputWriter.write(contents);
+    } catch (IOException e) {
+      e.printStackTrace();
+      throw new RuntimeException(e);
+    } finally {
+      FileUtility.closeCloseableIfNotNull(outputWriter);
+    }
+  }
+
+  String createFullPath(String fileName) {
+    return testTempFilesFolderName + "/" + fileName;
+  }
+
+  protected void writeBasicJoshuaConfigFile(String configFileName) {
+    writeContents(createFullPath(configFileName), createJoshuaConfigFileContents());
+  }
+
+  protected void writeBasicJoshuaConfigFile(String configFileName, String featureFunctionName) {
+    writeContents(createFullPath(configFileName),
+        createJoshuaConfigFileContents(featureFunctionName));
+  }
+
+  protected void writeJoshuaExtraFeaturesConfigFile(String configFileName,
+      String featureFunctionName, List<String> featureNames) {
+    TestConfigFileCreater.writeContents(createFullPath(configFileName),
+        createJoshuaConfigFileContentsWithExtraFeatures(featureFunctionName, featureNames));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/decoder/TranslationsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/TranslationsTest.java b/src/test/java/org/apache/joshua/decoder/TranslationsTest.java
new file mode 100644
index 0000000..9d2cb34
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/TranslationsTest.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import static org.testng.Assert.*;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+
+import org.testng.annotations.Test;
+import org.testng.annotations.BeforeTest;
+import org.apache.joshua.decoder.io.TranslationRequestStream;
+import org.testng.annotations.AfterTest;
+
+public class TranslationsTest {
+  private final JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+  @BeforeTest
+  public void beforeTest() {
+  }
+
+  @AfterTest
+  public void afterTest() {
+  }
+
+
+  @Test(enabled = false)
+  public void Translations() {
+    throw new RuntimeException("Test not implemented");
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.TranslationRequest#next()}.
+   */
+  @Test(enabled = false)
+  public void testNext() {
+    fail("Not yet implemented");
+  }
+
+  @Test(enabled = false)
+  public void iterator() {
+    throw new RuntimeException("Test not implemented");
+  }
+
+  // @Test(expectedExceptions = TestException.class)
+  @Test(enabled = false)
+  public void next() {
+    byte[] data = "1\n2\n".getBytes();
+    ByteArrayInputStream input = new ByteArrayInputStream(data);
+    TranslationRequestStream request = new TranslationRequestStream(
+        new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+    Translations translations = new Translations(request);
+    assertEquals(translations.next().getSourceSentence().source(), "1");
+    // Remove the next two.
+    assertEquals(translations.next().getSourceSentence().source(), "2");
+    // Should throw exception
+    translations.next();
+    translations.next();
+  }
+
+  @Test(enabled = false)
+  public void record() {
+    throw new RuntimeException("Test not implemented");
+  }
+
+  @Test(enabled = false)
+  public void remove() {
+    throw new RuntimeException("Test not implemented");
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/decoder/ff/ArityPhrasePenaltyFFTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/ff/ArityPhrasePenaltyFFTest.java b/src/test/java/org/apache/joshua/decoder/ff/ArityPhrasePenaltyFFTest.java
new file mode 100644
index 0000000..2e4b78b
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/ff/ArityPhrasePenaltyFFTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+import org.apache.joshua.decoder.ff.tm.BilingualRule;
+import org.apache.joshua.decoder.ff.tm.MonolingualRule;
+import org.apache.joshua.decoder.ff.tm.Rule;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for ArityPhrasePenaltyFF.
+ * 
+ * @author Lane Schwartz
+ * @version $LastChangedDate$
+ */
+public class ArityPhrasePenaltyFFTest {
+
+  @Test
+  public void alpha() {
+    Assert.assertEquals(ArityPhrasePenaltyFF.ALPHA, - Math.log10(Math.E));
+  }
+
+  @Test
+  public void estimate() {
+
+    int featureID = 0;
+    double weight = 0.0;
+    int owner = MonolingualRule.DUMMY_OWNER;
+    int min = 1;
+    int max = 5;
+
+    ArityPhrasePenaltyFF featureFunction = new ArityPhrasePenaltyFF(featureID, weight, owner, min, max);
+
+    int lhs = -1;
+    int[] sourceRHS = {24, -1, 42, 738};
+    int[] targetRHS = {-1, 7, 8};
+    float[] featureScores = {-2.35f, -1.78f, -0.52f};
+    int arity = 1;
+
+    Rule dummyRule = new BilingualRule(lhs, sourceRHS, targetRHS, featureScores, arity);
+
+    Assert.assertEquals(featureFunction.estimateLogP(dummyRule, -1), ArityPhrasePenaltyFF.ALPHA);
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java b/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java
new file mode 100644
index 0000000..9add469
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.ff.lm.berkeley_lm.LMGrammarBerkeley;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for testing ARPA language model class.
+ * 
+ * @author Lane Schwartz
+ */
+public class ArpaFileTest {
+
+  String arpaFileName;
+
+  Vocabulary vocab;
+
+  @Test
+  public void setup() {
+
+    vocab = new Vocabulary();
+    vocab.id("a");
+    vocab.id("because");
+    vocab.id("boycott");
+    vocab.id("of");
+    vocab.id("parliament");
+    vocab.id("potato");
+    vocab.id("resumption");
+    vocab.id("the");
+
+    try {
+      File file = File.createTempFile("testLM", "arpa");
+      PrintStream out = new PrintStream(file, "UTF-8");
+
+      out.println();
+      out.println("\\data\\");
+      out.println("ngram 1=8");
+      out.println("ngram 2=4");
+      out.println("ngram 3=1");
+      out.println();
+
+      out.println("\\1-grams:");
+      out.println("-1.992672	a	-0.1195484");
+      out.println("-2.713723	because	-0.4665429");
+      out.println("-4.678545	boycott	-0.0902521");
+      out.println("-1.609573	of	-0.1991907");
+      out.println("-3.875917	parliament	-0.1274891");
+      out.println("-9.753210	potato");
+      out.println("-4.678545	resumption	-0.07945678");
+      out.println("-1.712444	the	-0.1606644");
+
+      out.println();
+      out.println("\\2-grams:");
+      out.println("-0.3552987	because of	-0.03083654");
+      out.println("-1.403534	of a");
+      out.println("-0.7507797	of the	-0.05237135");
+      out.println("-0.7266324	resumption of");
+      out.println("-3.936147	the resumption");
+
+      out.println();
+      out.println("\\3-grams:");
+      out.println("-0.6309999	because of the");
+      out.println();
+
+      out.println("\\end\\");
+
+      out.close();
+      this.arpaFileName = file.getAbsolutePath();
+
+    } catch (IOException e) {
+      Assert.fail("Unable to create temporary file: " + e.toString());
+    }
+
+  }
+
+  @Test(dependsOnMethods = { "setup" })
+  public void testOrder() {
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    try {
+      Assert.assertEquals(arpaFile.getOrder(), 3);
+    } catch (FileNotFoundException e) {
+      Assert.fail(e.toString());
+    }
+  }
+
+  @Test(dependsOnMethods = { "setup" })
+  public void testIteration() {
+
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    Map<Integer, Integer> counts = new HashMap<Integer, Integer>();
+
+    boolean iterationOccurred = false;
+
+    for (ArpaNgram ngram : arpaFile) {
+
+      iterationOccurred = true;
+
+      int order = ngram.order();
+      //			System.err.println("Order = " + order);
+
+      int count;
+      if (counts.containsKey(order)) {
+        count = counts.get(order) + 1;
+      } else {
+        count = 1;
+      }
+
+      counts.put(order, count);
+
+    }
+
+    Assert.assertTrue(iterationOccurred);
+
+    Assert.assertTrue(counts.containsKey(1));
+    Assert.assertTrue(counts.containsKey(2));
+    Assert.assertTrue(counts.containsKey(3));
+
+    Assert.assertEquals((int) counts.get(1), 8);
+    Assert.assertEquals((int) counts.get(2), 5);
+    Assert.assertEquals((int) counts.get(3), 1);
+
+  }
+
+  @Test(dependsOnMethods = { "setup" })
+  public void testSize() {
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    Assert.assertEquals(arpaFile.size(), 14);
+  }
+
+  @Test(dependsOnMethods = { "setup", "testIteration" })
+  public void testChildren() throws FileNotFoundException {
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    TrieLM lm = new TrieLM(arpaFile);
+    //		System.err.println(lm.getChildren().size());
+    Assert.assertNotSame(lm.getChildren().size(), 0);
+  }
+
+  @Test(dependsOnMethods = { "setup", "testIteration", "testChildren" })
+  public void testTrie() throws FileNotFoundException {
+    ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+    TrieLM lm = new TrieLM(arpaFile);
+
+    testLm(lm);
+
+  }
+
+  @Test(dependsOnMethods = { "setup", "testIteration", "testChildren" })
+  public void testBerkeley() throws FileNotFoundException {
+
+    LMGrammarBerkeley lm = new LMGrammarBerkeley(vocab, 3, arpaFileName);
+
+    testLm(lm);
+
+  }
+
+  /**
+   * @param lm
+   */
+  private void testLm(AbstractLM lm) {
+    // Test unigrams known to be in the language model
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("a")), -1.992672, 0.000001f);
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("because")), -2.713723, 0.000001f);
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("boycott")), -4.678545, 0.000001f);
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("of")), -1.609573, 0.000001f);
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("parliament")), -3.875917, 0.000001f);
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("potato")), -9.753210, 0.000001f);
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("resumption")), -4.678545, 0.000001f);
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("the")), -1.712444, 0.000001f);
+
+    // Test unigrams known to NOT be in the language model
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("banana")), -JoshuaConfiguration.lm_ceiling_cost, 0.000001f);
+
+    // Test bigrams known to be in the language model
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("because of")), -0.3552987, 0.000001f);
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("of the")), -0.7507797, 0.000001f);
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("resumption of")), -0.7266324, 0.000001f);
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("the resumption")), -3.936147, 0.000001f);
+
+    // Test trigrams known to be in the language model
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("because of the")), -0.6309999f, 0.000001f);
+
+    // Test bigrams know to NOT be in the language model (but the unigrams are)
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("a boycott")), -4.678545f + -0.1195484f, 0.000001f);
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("of parliament")), -3.875917f + -0.1991907f, 0.000001f);
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("the potato")), -9.753210f + -0.1606644f, 0.000001f);
+    Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("potato parliament")), -3.875917f + -0.0f, 0.000001f);
+
+    // Test trigrams know to NOT be in the language model (but the bigrams are)
+    int[] words = vocab.getIDs("because of a");
+    double f = lm.ngramLogProbability(words);
+    Assert.assertEquals(f, -1.403534f + -0.03083654f, 0.000001f);
+    //		//Assert.assertEquals(lm.ngramLogProbability(vocab.getIDs("of the parliament")), -3.875917f + -0.05237135f, 0.000001f);
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java b/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
index da8218b..f762e31 100644
--- a/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
+++ b/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
@@ -19,6 +19,7 @@
 package org.apache.joshua.decoder.ff.lm;
 
 import static org.junit.Assert.*;
+import static org.hamcrest.CoreMatchers.*;
 
 import org.junit.After;
 import org.junit.Before;
@@ -81,7 +82,7 @@ public class LanguageModelFFTest {
   @Test
   public void givenStartAndOneMoreSymbol_whenEstimateFutureCost_thenMultipleWeightAndLogProbabilty() {
     int startSymbolId = Vocabulary.id(Vocabulary.START_SYM);
-    assertNotEquals(startSymbolId, 3);
+    assertThat(startSymbolId, not(equalTo(3)));
     int[] left = {startSymbolId, 3};
     NgramDPState currentState = new NgramDPState(left, new int[left.length]);
     

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java b/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
index 2c4b859..df73136 100644
--- a/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
+++ b/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
@@ -27,7 +27,6 @@ import org.junit.After;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameter;
 import org.junit.runners.Parameterized.Parameters;
 
 import org.apache.joshua.decoder.Decoder;
@@ -38,7 +37,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
 /**
  * Replacement for test/lm/berkeley/test.sh regression test
  */
-@RunWith(Parameterized.class)
+@RunWith(value = Parameterized.class)
 public class LMGrammarBerkeleyTest {
 
   private static final String INPUT = "the chat-rooms";
@@ -60,7 +59,7 @@ public class LMGrammarBerkeleyTest {
     decoder.cleanUp();
   }
   
-  @Parameter
+  //TODO @Parameters
   public String lmFile;
   
   @Test

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java b/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java
new file mode 100644
index 0000000..88b2350
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.io;
+
+import static org.testng.Assert.assertEquals;
+
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ *
+ */
+public class DeNormalizeTest {
+
+  private String tokenized;
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @BeforeMethod
+  protected void setUp() throws Exception {
+    tokenized = "my son 's friend , however , plays a high - risk game .";
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#processSingleLine(java.lang.String)}.
+   */
+  @Test(enabled = true)
+  public void testProcessSingleLine() {
+    tokenized =
+        "my son 's friend , ( dr . -rrb- robotnik , phd , however , wo n't play a high - risk game .";
+    String expected = "My son's friend, (Dr.) robotnik, PhD, however, won't play a high-risk game.";
+    String actual = DeNormalize.processSingleLine(tokenized);
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#processSingleLine(java.lang.String)}.
+   */
+  @Test
+  public void testProcessSingleLine_interspersed() {
+    tokenized = "phd mphil";
+    String expected = "PhD MPhil";
+    String actual = DeNormalize.processSingleLine(tokenized);
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for
+   * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+   */
+  @Test
+  public void testCapitalizeLineFirstLetter() throws Exception {
+    String actual = DeNormalize.capitalizeLineFirstLetter(tokenized);
+    String expected = "My son 's friend , however , plays a high - risk game .";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for
+   * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+   */
+  @Test
+  public void testCapitalizeLineFirstLetter_empty() throws Exception {
+    String actual = DeNormalize.capitalizeLineFirstLetter("");
+    String expected = "";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for
+   * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+   */
+  @Test
+  public void testCapitalizeLineFirstLetter_singleNumberCharacter() throws Exception {
+    String actual = DeNormalize.capitalizeLineFirstLetter("1");
+    String expected = "1";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for
+   * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+   */
+  @Test
+  public void testCapitalizeLineFirstLetter_singleLetterCharacter() throws Exception {
+    String actual = DeNormalize.capitalizeLineFirstLetter("a");
+    String expected = "A";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinPunctuationMarks(java.lang.String)}.
+   */
+  @Test
+  public void testJoinPunctuationMarks() throws Exception {
+    String actual = DeNormalize.joinPunctuationMarks(tokenized);
+    String expected = "my son 's friend, however, plays a high - risk game.";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinPunctuationMarks(java.lang.String)}.
+   */
+  @Test
+  public void testJoinPunctuationMarks_empty() throws Exception {
+    String actual = DeNormalize.joinPunctuationMarks("");
+    String expected = "";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+   */
+  @Test
+  public void testJoinHyphen() throws Exception {
+    String actual = DeNormalize.joinHyphen(tokenized);
+    String expected = "my son 's friend , however , plays a high-risk game .";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+   */
+  @Test
+  public void testJoinHypen_empty() throws Exception {
+    String actual = DeNormalize.joinHyphen("");
+    String expected = "";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+   */
+  @Test
+  public void testJoinHyphen_1space_btw_2hyphens() throws Exception {
+    String actual = DeNormalize.joinHyphen("a - - b");
+    String expected = "a-- b";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+   */
+  @Test
+  public void testJoinHyphen_2spaces_btw_2hyphens() throws Exception {
+    String actual = DeNormalize.joinHyphen("a -  - b");
+    String expected = "a--b";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinContractions(java.lang.String)}.
+   */
+  @Test
+  public void testJoinContractions() throws Exception {
+    tokenized = "my son 's friend , however , wo n't play a high - risk game .";
+    String actual = DeNormalize.joinContractions(tokenized);
+    String expected = "my son's friend , however , won't play a high - risk game .";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#joinContractions(java.lang.String)}.
+   */
+  @Test
+  public void testJoinContractions_empty() throws Exception {
+    String actual = DeNormalize.joinContractions("");
+    String expected = "";
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for
+   * {@link joshua.decoder.io.DeNormalize#capitalizeNameTitleAbbrvs(java.lang.String)}.
+   */
+  @Test
+  public void testCapitalizeNameTitleAbbrvs() throws Exception {
+    String actual, expected;
+    tokenized =
+        "my son 's friend , dr . robotnik , phd , however , wo n't play a high - risk game .";
+    expected =
+        "my son 's friend , Dr . robotnik , PhD , however , wo n't play a high - risk game .";
+    actual = DeNormalize.capitalizeNameTitleAbbrvs(tokenized);
+    assertEquals(actual, expected);
+
+    tokenized = "mr mrs ms miss dr prof";
+    expected = "Mr Mrs Ms Miss Dr Prof";
+    actual = DeNormalize.capitalizeNameTitleAbbrvs(tokenized);
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#capitalizeI(java.lang.String)}.
+   */
+  @Test
+  public void testCapitalizeI() throws Exception {
+    String expected, actual;
+
+    tokenized = "sam i am";
+    expected = "sam I am";
+    actual = DeNormalize.capitalizeI(tokenized);
+    assertEquals(actual, expected);
+
+    tokenized = "sam iam";
+    expected = "sam iam";
+    actual = DeNormalize.capitalizeI(tokenized);
+    assertEquals(actual, expected);
+
+    tokenized = "sami am";
+    expected = "sami am";
+    actual = DeNormalize.capitalizeI(tokenized);
+    assertEquals(actual, expected);
+
+    tokenized = "samiam";
+    expected = "samiam";
+    actual = DeNormalize.capitalizeI(tokenized);
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#replaceBracketTokens(java.lang.String)}.
+   */
+  @Test
+  public void testReplaceBracketTokens() throws Exception {
+    String expected, actual;
+
+    tokenized = "-lrb- i -rrb-";
+    expected = "( i )";
+    actual = DeNormalize.replaceBracketTokens(tokenized);
+    assertEquals(actual, expected);
+
+    tokenized = "-LRB- i -RRB-";
+    expected = "( i )";
+    actual = DeNormalize.replaceBracketTokens(tokenized);
+    assertEquals(actual, expected);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.DeNormalize#detokenizeBracketTokens(java.lang.String)}
+   */
+  @Test
+  public void testDetokenizeBracketTokens() throws Exception {
+    String expected, actual;
+
+    tokenized = "( i )";
+    expected = "(i)";
+    actual = DeNormalize.joinPunctuationMarks(tokenized);
+    assertEquals(actual, expected);
+
+    tokenized = "[ i } j";
+    expected = "[i} j";
+    actual = DeNormalize.joinPunctuationMarks(tokenized);
+    assertEquals(actual, expected);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/decoder/io/TranslationRequestTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/io/TranslationRequestTest.java b/src/test/java/org/apache/joshua/decoder/io/TranslationRequestTest.java
new file mode 100644
index 0000000..5a1c3ab
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/io/TranslationRequestTest.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.io;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+
+import org.testng.annotations.*;
+import static org.testng.Assert.*;
+import static org.mockito.Mockito.*;
+
+/**
+ * This class verifies the following behaviors:
+ * 
+ * - A blank input, i.e. "", does not cause a translation to be created.
+ * 
+ * - A non-blank input that is not followed by a newline, e.g. "1", causes a translation to be
+ * created.
+ * 
+ * - An input that contains whitespace or nothing followed by a newline causes a translation to be
+ * created, with "" as the source.
+ */
+public class TranslationRequestTest {
+
+  private final JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+  @BeforeMethod
+  public void createTranslationRequest() throws Exception {
+  }
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @BeforeMethod
+  protected void setUp() throws Exception {
+  }
+
+  /**
+   * @throws java.lang.Exception
+   */
+  @AfterMethod
+  protected void tearDown() throws Exception {
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.TranslationRequest#TranslationRequest(java.io.InputStream)}.
+   */
+  @Test(enabled = false)
+  public void testTranslationRequest() {
+    fail("Not yet implemented");
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.TranslationRequest#size()}.
+   */
+  @Test(enabled = true)
+  public void testSize_uponConstruction() {
+    InputStream in = mock(InputStream.class);
+    TranslationRequestStream request = new TranslationRequestStream(
+        new BufferedReader(new InputStreamReader(in, Charset.defaultCharset())), joshuaConfiguration);
+    assertEquals(request.size(), 0);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.TranslationRequest#size()}.
+   * @throws Exception 
+   */
+  @Test(enabled = true)
+  public void testSize_1() throws Exception {
+    byte[] data = "1".getBytes();
+    ByteArrayInputStream input = new ByteArrayInputStream(data);
+    TranslationRequestStream request = new TranslationRequestStream(
+        new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+    request.next();
+    assertEquals(request.size(), 1);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.TranslationRequest#size()}.
+   * @throws Exception 
+   */
+  @Test(enabled = true)
+  public void testSize_newline() throws Exception {
+    byte[] data = "\n".getBytes();
+    ByteArrayInputStream input = new ByteArrayInputStream(data);
+    TranslationRequestStream request = new TranslationRequestStream(
+        new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+    request.next();
+    assertEquals(request.size(), 1);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.TranslationRequest#size()}.
+   * @throws Exception 
+   */
+  @Test(enabled = true)
+  public void testSize_2newlines() throws Exception {
+    byte[] data = "\n\n".getBytes();
+    ByteArrayInputStream input = new ByteArrayInputStream(data);
+    TranslationRequestStream request = new TranslationRequestStream(
+        new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+    request.next();
+    request.next();
+    assertEquals(request.size(), 2);
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.TranslationRequest#next()}.
+   * @throws Exception 
+   */
+  @Test(enabled = true)
+  public void testNext_2Newlines() throws Exception {
+    byte[] data = "\n\n".getBytes();
+    ByteArrayInputStream input = new ByteArrayInputStream(data);
+    TranslationRequestStream request = new TranslationRequestStream(
+        new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+    assertEquals(request.next().source(), "");
+    assertEquals(request.next().source(), "");
+  }
+
+  /**
+   * Test method for {@link joshua.decoder.io.TranslationRequest#remove()}.
+   */
+  @Test(enabled = false)
+  public void testRemove() {
+    fail("Not yet implemented");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java b/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
new file mode 100644
index 0000000..3b2852c
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.segment_file;
+
+import org.testng.annotations.Test;
+
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.AfterMethod;
+import static org.testng.Assert.*;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+
+public class AlmostTooLongSentenceTest {
+  private JoshuaConfiguration joshuaConfiguration;
+  private String almostTooLongInput;
+  private Sentence sentencePlusTarget;
+
+  @BeforeMethod
+  public void setUp() {
+    joshuaConfiguration = new JoshuaConfiguration();
+    almostTooLongInput = concatStrings(".", joshuaConfiguration.maxlen);
+    sentencePlusTarget = new Sentence(this.almostTooLongInput + " ||| target side", 0,joshuaConfiguration);
+  }
+
+  @AfterMethod
+  public void tearDown() {
+  }
+
+  @Test
+  public void testConstructor() {
+    Sentence sent = new Sentence("", 0,joshuaConfiguration);
+    assertNotNull(sent);
+  }
+
+  @Test
+  public void testEmpty() {
+    assertTrue(new Sentence("", 0,joshuaConfiguration).isEmpty());
+  }
+
+  @Test
+  public void testNotEmpty() {
+    assertFalse(new Sentence("hello , world", 0, joshuaConfiguration).isEmpty());
+  }
+
+  /**
+   * Return a string consisting of repeatedToken concatenated MAX_SENTENCE_NODES times.
+   *
+   * @param repeatedToken
+   * @param repeatedTimes
+   * @return
+   */
+  private String concatStrings(String repeatedToken, int repeatedTimes) {
+    String result = "";
+    for (int i = 0; i < repeatedTimes; i++) {
+      result += repeatedToken;
+    }
+    return result;
+  }
+
+  @Test
+  public void testAlmostButNotTooManyTokensSourceOnlyNotEmpty() {
+    assertFalse(new Sentence(this.almostTooLongInput, 0, joshuaConfiguration).isEmpty());
+  }
+
+  @Test
+  public void testAlmostButNotTooManyTokensSourceOnlyTargetNull() {
+    assertNull(new Sentence(this.almostTooLongInput, 0, joshuaConfiguration).target);
+  }
+
+  @Test
+  public void testAlmostButNotTooManyTokensSourceAndTargetTargetIsNotEmpty() {
+    assertFalse(this.sentencePlusTarget.isEmpty());
+  }
+
+  @Test
+  public void testAlmostButNotTooManyTokensSourceAndTargetTargetNull() {
+    assertEquals(this.sentencePlusTarget.target, "target side");
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java b/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
new file mode 100644
index 0000000..78483bd
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.segment_file;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+
+import org.testng.annotations.Test;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.AfterMethod;
+import static org.testng.Assert.*;
+
+public class SentenceTest {
+  private String tooLongInput;
+  private final JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+  
+  
+
+  @BeforeMethod
+  public void setUp() {
+    tooLongInput = concatTokens("*", joshuaConfiguration.maxlen * 2);
+  }
+
+  @AfterMethod
+  public void tearDown() {
+  }
+
+  @Test
+  public void testConstructor() {
+    Sentence sent = new Sentence("", 0, joshuaConfiguration);
+    assertNotNull(sent);
+  }
+
+  @Test
+  public void testEmpty() {
+    assertTrue(new Sentence("", 0, joshuaConfiguration).isEmpty());
+  }
+
+  @Test
+  public void testNotEmpty() {
+    assertFalse(new Sentence("hello , world", 0, joshuaConfiguration).isEmpty());
+  }
+
+  /**
+   * Return a string consisting of repeatedToken concatenated MAX_SENTENCE_NODES times, joined by a
+   * space.
+   *
+   * @param repeatedToken
+   * @param repeatedTimes
+   * @return
+   */
+  private String concatTokens(String repeatedToken, int repeatedTimes) {
+    String result = "";
+    for (int i = 0; i < repeatedTimes - 1; i++) {
+      result += repeatedToken + " ";
+    }
+    result += repeatedToken;
+    return result;
+  }
+
+  /**
+   * The too long input sentence should be replaced with an empty string.
+   */
+  @Test
+  public void testTooManyTokensSourceOnlyEmpty() {
+    assertTrue(new Sentence(this.tooLongInput, 0, joshuaConfiguration).isEmpty());
+  }
+
+  @Test
+  public void testTooManyTokensSourceOnlyNotNull() {
+    assertNotNull(new Sentence(this.tooLongInput, 0, joshuaConfiguration));
+  }
+
+  @Test
+  public void testTooManyTokensSourceAndTargetIsEmpty() {
+    Sentence sentence = new Sentence(this.tooLongInput + " ||| target side", 0, joshuaConfiguration);
+    assertEquals(sentence.target, "");
+  }
+
+  @Test
+  public void testTooManyTokensSourceAndTargetEmptyString() {
+    Sentence sentence = new Sentence(this.tooLongInput + " ||| target side", 0, joshuaConfiguration);
+    assertTrue(sentence.isEmpty());
+  }
+
+  @Test
+  public void testClearlyNotTooManyTokens() {
+    // Concatenate MAX_SENTENCE_NODES, each shorter than the average length, joined by a space.
+    String input = "token";
+    assertFalse(new Sentence(input, 0, joshuaConfiguration).isEmpty());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7f824b4e/src/test/java/org/apache/joshua/lattice/ArcTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/lattice/ArcTest.java b/src/test/java/org/apache/joshua/lattice/ArcTest.java
new file mode 100644
index 0000000..6dcf894
--- /dev/null
+++ b/src/test/java/org/apache/joshua/lattice/ArcTest.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.lattice;
+
+import org.apache.joshua.lattice.Arc;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for Arc class.
+ * 
+ * @author Lane Schwartz
+ * @since 2008-07-09
+ * @version $LastChangedDate$
+ */
+@Test(groups = { "lattice_arc" })
+public class ArcTest {
+
+  private final Node<String> head = new Node<String>(1);
+  private final Node<String> tail = new Node<String>(2);
+  private final double cost = Math.PI;
+  private final String label = "pi";
+
+  private Arc<String> arc;
+
+  @Test(dependsOnMethods = { "org.apache.joshua.lattice.NodeTest.constructNode" })
+  //@Test(dependsOnGroups = {"lattice_node" })
+  public void constructArc() {
+
+    arc = new Arc<String>(head, tail, (float)cost, label);
+
+    Assert.assertEquals(arc.getHead(), head);
+    Assert.assertEquals(arc.getTail(), tail);
+    Assert.assertEquals(arc.getCost(), cost);
+    Assert.assertEquals(arc.getLabel(), label);
+
+  }
+
+  @Test(dependsOnMethods = { "constructArc" })
+  public void getHead() {
+
+    Assert.assertEquals(arc.getHead(), head);
+
+  }
+
+
+  @Test(dependsOnMethods = { "constructArc" })
+  public void getTail() {
+
+    Assert.assertEquals(arc.getTail(), tail);
+
+  }
+
+
+  @Test(dependsOnMethods = { "constructArc" })
+  public void getCost() {
+
+    Assert.assertEquals(arc.getCost(), cost);
+
+  }
+
+
+  @Test(dependsOnMethods = { "constructArc" })
+  public void getLabel() {
+
+    Assert.assertEquals(arc.getLabel(), label);
+
+  }
+}