You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/15 11:17:36 UTC

[4/9] incubator-joshua git commit: moved new unit tests (former regression tests) to correct location in joshua 7 (joshua-core/src)

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/decoder/lowercaser/joshua.config
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/lowercaser/joshua.config b/joshua-core/src/test/resources/decoder/lowercaser/joshua.config
new file mode 100644
index 0000000..6f5a46b
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/lowercaser/joshua.config
@@ -0,0 +1,140 @@
+# This file is a template for the Joshua pipeline; variables enclosed
+# in <angle-brackets> are substituted by the pipeline script as
+# appropriate.  This file also serves to document Joshua's many
+# parameters.
+
+# These are the grammar file specifications.  Joshua supports an
+# arbitrary number of grammar files, each specified on its own line
+# using the following format:
+#
+#   tm = TYPE OWNER LIMIT FILE
+# 
+# TYPE is "packed", "thrax", or "samt".  The latter denotes the format
+# used in Zollmann and Venugopal's SAMT decoder
+# (http://www.cs.cmu.edu/~zollmann/samt/).
+# 
+# OWNER is the "owner" of the rules in the grammar; this is used to
+# determine which set of phrasal features apply to the grammar's
+# rules.  Having different owners allows different features to be
+# applied to different grammars, and for grammars to share features
+# across files.
+#
+# LIMIT is the maximum input span permitted for the application of
+# grammar rules found in the grammar file.  A value of -1 implies no limit.
+#
+# FILE is the grammar file (or directory when using packed grammars).
+# The file can be compressed with gzip, which is determined by the
+# presence or absence of a ".gz" file extension.
+#
+# By a convention defined by Chiang (2007), the grammars are split
+# into two files: the main translation grammar containing all the
+# learned translation rules, and a glue grammar which supports
+# monotonic concatenation of hierarchical phrases. The glue grammar's
+# main distinction from the regular grammar is that the span limit
+# does not apply to it.  
+
+tm = hiero -maxspan 20 -path src/test/resources/decoder/lowercaser/grammar.test -owner pt
+tm = thrax -path src/test/resources/decoder/lowercaser/grammar.glue -maxspan -1 -owner glue
+
+# This symbol is used over unknown words in the source language
+
+default-non-terminal = X
+
+# This is the goal nonterminal, used to determine when a complete
+# parse is found.  It should correspond to the root-level rules in the
+# glue grammar.
+
+goal-symbol = GOAL
+
+# Language model config.
+#
+# Multiple language models are supported.  For each language model,
+# create one of the following lines:
+#
+# feature-function = LanguageModel -lm_type TYPE -lm_order ORDER -lm_file FILE
+# feature-function = StateMinimizingLanguageModel -lm_order ORDER -lm_file FILE
+#
+# - TYPE is one of "kenlm" or "berkeleylm"
+# - ORDER is the order of the language model (default 5)
+# - FILE is the path to the LM file. This can be binarized if appropriate to the type
+#   (e.g., KenLM has a compiled format)
+#
+# A state-minimizing LM collapses left-state. Currently only KenLM supports this.
+#
+# For each LM, add a weight lm_INDEX below, where indexing starts from 0.
+
+
+
+# The suffix _OOV is appended to unknown source-language words if this
+# is set to true.
+
+mark-oovs = false
+
+# The search algorithm: "cky" for hierarchical / phrase-based decoding, 
+# "stack" for phrase-based decoding
+search = cky
+
+# The pop-limit for decoding.  This determines how many hypotheses are
+# considered over each span of the input.
+
+pop-limit = 100
+
+# How many hypotheses to output
+
+top-n = 1
+
+# Whether those hypotheses should be distinct strings
+
+use-unique-nbest = true
+
+# This is the default format of the ouput printed to STDOUT.  The variables that can be
+# substituted are:
+#
+# %i: the sentence number (0-indexed)
+# %s: the translated sentence
+# %t: the derivation tree
+# %f: the feature string
+# %c: the model cost
+
+output-format = %s
+
+# When printing the trees (%t in 'output-format'), this controls whether the alignments
+# are also printed.
+
+include-align-index = false
+
+# And these are the feature functions to activate.
+feature-function = OOVPenalty
+feature-function = WordPenalty
+
+## Model weights #####################################################
+
+# For each langage model line listed above, create a weight in the
+# following format: the keyword "lm", a 0-based index, and the weight.
+# lm_INDEX WEIGHT
+
+
+# The phrasal weights correspond to weights stored with each of the
+# grammar rules.  The format is
+#
+#   tm_OWNER_COLUMN WEIGHT
+#
+# where COLUMN denotes the 0-based order of the parameter in the
+# grammar file and WEIGHT is the corresponding weight.  In the future,
+# we plan to add a sparse feature representation which will simplify
+# this.
+
+# The wordpenalty feature counts the number of words in each hypothesis.
+
+
+# This feature counts the number of unknown words in the hypothesis.
+
+
+# This feature weights paths through an input lattice.  It is only activated
+# when decoding lattices.
+
+WordPenalty -4.72455379476569
+OOVPenalty 0.7897219562429866
+tm_pt_0 0.3137696816891433
+tm_glue_0 -0.04493059277470993
+

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/decoder/moses-compat/NEEDS_UPDATING
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/moses-compat/NEEDS_UPDATING b/joshua-core/src/test/resources/decoder/moses-compat/NEEDS_UPDATING
new file mode 100644
index 0000000..90402c6
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/moses-compat/NEEDS_UPDATING
@@ -0,0 +1 @@
+Needs to be moved to a unit test. The parameter JoshuaConfiguration.moses is handled by JoshuaDecoder. Therefore, the CLI must be made testable before a unit test can be created.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/config.packed
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/config.packed b/joshua-core/src/test/resources/phrase_decoder/config.packed
new file mode 100644
index 0000000..9987b1a
--- /dev/null
+++ b/joshua-core/src/test/resources/phrase_decoder/config.packed
@@ -0,0 +1,29 @@
+tm = moses -owner pt -maxspan 0 -path rules.packed -max-source-len 5
+feature-function = StateMinimizingLanguageModel -lm_order 5 -lm_file lm.1.gz
+
+search = stack
+
+mark-oovs = false
+pop-limit = 10
+top-n = 1
+
+output-format = %i ||| %s ||| %f ||| %c
+
+include-align-index = false
+reordering-limit = 6
+
+# And these are the feature functions to activate.
+feature-function = OOVPenalty
+feature-function = WordPenalty
+feature-function = Distortion
+feature-function = PhrasePenalty -owner pt
+
+OOVPenalty 1.0
+Distortion 0.114849
+WordPenalty -0.201544
+PhrasePenalty -0.236965
+tm_pt_0 0.0370068
+tm_pt_1 0.0495759
+tm_pt_2 0.196742
+tm_pt_3 0.0745423
+lm_0 0.204412452147565

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/config
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/config b/joshua-core/src/test/resources/phrase_decoder/rules.packed/config
new file mode 100644
index 0000000..2251fe6
--- /dev/null
+++ b/joshua-core/src/test/resources/phrase_decoder/rules.packed/config
@@ -0,0 +1,2 @@
+version = 4
+max-source-len = 3

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/encoding
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/encoding b/joshua-core/src/test/resources/phrase_decoder/rules.packed/encoding
new file mode 100644
index 0000000..57e7b75
Binary files /dev/null and b/joshua-core/src/test/resources/phrase_decoder/rules.packed/encoding differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.features
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.features b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.features
new file mode 100644
index 0000000..2a77e43
Binary files /dev/null and b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.features differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.source
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.source b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.source
new file mode 100644
index 0000000..c384c54
Binary files /dev/null and b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.source differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target
new file mode 100644
index 0000000..8375cf0
Binary files /dev/null and b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target.lookup
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target.lookup b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target.lookup
new file mode 100644
index 0000000..3e8c294
Binary files /dev/null and b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target.lookup differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/vocabulary
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/vocabulary b/joshua-core/src/test/resources/phrase_decoder/rules.packed/vocabulary
new file mode 100644
index 0000000..528a970
Binary files /dev/null and b/joshua-core/src/test/resources/phrase_decoder/rules.packed/vocabulary differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/BnEnDecodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/BnEnDecodingTest.java b/src/test/java/org/apache/joshua/decoder/cky/BnEnDecodingTest.java
deleted file mode 100644
index d11d58f..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/BnEnDecodingTest.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
-import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
-import static org.testng.Assert.assertEquals;
-
-import java.util.List;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.util.io.KenLmTestUtil;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.Test;
-
-public class BnEnDecodingTest {
-
-	private JoshuaConfiguration joshuaConfig;
-	private Decoder decoder;
-
-	@AfterMethod
-	public void tearDown() throws Exception {
-		if(decoder != null) {
-			decoder.cleanUp();
-			decoder = null;
-		}
-	}
-
-	@Test
-	public void givenBnEnInput_whenPhraseDecoding_thenScoreAndTranslationCorrect() throws Exception {
-		// Given
-		List<String> inputStrings = loadStringsFromFile("src/test/resources/bn-en/hiero/input.bn");
-
-		// When
-		configureDecoder("src/test/resources/bn-en/hiero/joshua.config");
-		List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
-		// Then
-		List<String> goldStrings = loadStringsFromFile("src/test/resources/bn-en/hiero/output.gold");
-		assertEquals(decodedStrings, goldStrings);
-	}
-
-	@Test
-	public void givenBnEnInput_whenPhraseDecodingWithBerkeleyLM_thenScoreAndTranslationCorrect() throws Exception {
-		// Given
-		List<String> inputStrings = loadStringsFromFile("src/test/resources/bn-en/hiero/input.bn");
-
-		// When
-		configureDecoder("src/test/resources/bn-en/hiero/joshua-berkeleylm.config");
-		List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
-		// Then
-		List<String> goldStrings = loadStringsFromFile("src/test/resources/bn-en/hiero/output-berkeleylm.gold");
-		assertEquals(decodedStrings, goldStrings);
-	}
-
-	@Test
-	public void givenBnEnInput_whenPhraseDecodingWithClassLM_thenScoreAndTranslationCorrect() throws Exception {
-		// Given
-		List<String> inputStrings = loadStringsFromFile("src/test/resources/bn-en/hiero/input.bn");
-
-		// When
-		configureDecoder("src/test/resources/bn-en/hiero/joshua-classlm.config");
-		List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
-		// Then
-		List<String> goldStrings = loadStringsFromFile("src/test/resources/bn-en/hiero/output-classlm.gold");
-		assertEquals(decodedStrings, goldStrings);
-	}
-	
-	@Test
-	public void givenBnEnInput_whenPhraseDecodingWithPackedGrammar_thenScoreAndTranslationCorrect() throws Exception {
-		// Given
-		List<String> inputStrings = loadStringsFromFile("src/test/resources/bn-en/packed/input.bn");
-
-		// When
-		configureDecoder("src/test/resources/bn-en/packed/joshua.config");
-		List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
-		// Then
-		List<String> goldStrings = loadStringsFromFile("src/test/resources/bn-en/packed/output.gold");
-		assertEquals(decodedStrings, goldStrings);
-	}
-	
-	@Test
-	public void givenBnEnInput_whenPhraseDecodingWithSAMT_thenScoreAndTranslationCorrect() throws Exception {
-		// Given
-		List<String> inputStrings = loadStringsFromFile("src/test/resources/bn-en/samt/input.bn");
-
-		// When
-		configureDecoder("src/test/resources/bn-en/samt/joshua.config");
-		List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
-		// Then
-		List<String> goldStrings = loadStringsFromFile("src/test/resources/bn-en/samt/output.gold");
-		assertEquals(decodedStrings, goldStrings);
-	}
-	
-	public void configureDecoder(String pathToConfig) throws Exception {
-		joshuaConfig = new JoshuaConfiguration();
-		joshuaConfig.readConfigFile(pathToConfig);
-		KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
-	}
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/ConstrainedTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/ConstrainedTest.java b/src/test/java/org/apache/joshua/decoder/cky/ConstrainedTest.java
deleted file mode 100644
index 53bab7a..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/ConstrainedTest.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
-import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
-import static org.testng.Assert.assertEquals;
-
-import java.util.List;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.util.io.KenLmTestUtil;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.Test;
-
-public class ConstrainedTest {
-
-	private JoshuaConfiguration joshuaConfig;
-	private Decoder decoder;
-
-	@AfterMethod
-	public void tearDown() throws Exception {
-		if(decoder != null) {
-			decoder.cleanUp();
-			decoder = null;
-		}
-	}
-
-	@Test
-	public void givenInput_whenConstrainedDecoding_thenScoreAndTranslationCorrect() throws Exception {
-		// Given
-		List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/constrained/input.bn");
-
-		// When
-		configureDecoder("src/test/resources/decoder/constrained/joshua.config");
-		List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
-		// Then
-		List<String> goldStrings = loadStringsFromFile("src/test/resources/decoder/constrained/output.gold");
-		assertEquals(decodedStrings, goldStrings);
-	}
-	
-	public void configureDecoder(String pathToConfig) throws Exception {
-		joshuaConfig = new JoshuaConfiguration();
-		joshuaConfig.readConfigFile(pathToConfig);
-		KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
-	}
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/DenormalizationTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/DenormalizationTest.java b/src/test/java/org/apache/joshua/decoder/cky/DenormalizationTest.java
deleted file mode 100644
index cbfb98b..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/DenormalizationTest.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.translate;
-import static org.testng.Assert.assertEquals;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.BeforeMethod;
-import org.testng.annotations.Test;
-
-public class DenormalizationTest {
-
-	private static final String INPUT = "� who you lookin' at , mr. ?";
-	private static final String GOLD = "�Who you lookin' at, Mr.?";
-	
-	private JoshuaConfiguration joshuaConfig = null;
-	private Decoder decoder = null;
-	
-	@BeforeMethod
-	public void setUp() throws Exception {
-		joshuaConfig = new JoshuaConfiguration();
-		joshuaConfig.outputFormat = "%S";
-		joshuaConfig.mark_oovs = false;
-		joshuaConfig.topN = 1;
-		decoder = new Decoder(joshuaConfig, "");
-	}
-
-	@AfterMethod
-	public void tearDown() throws Exception {
-		decoder.cleanUp();
-		decoder = null;
-	}
-	
-	@Test
-	public void givenTokenizedInputWithSpecialCharacters_whenDecoding_thenOutputNormalized() {
-		String output = translate(INPUT, decoder, joshuaConfig);
-		assertEquals(output.trim(), GOLD);
-	}	
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/DoNotCrashTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/DoNotCrashTest.java b/src/test/java/org/apache/joshua/decoder/cky/DoNotCrashTest.java
deleted file mode 100644
index 4a7010b..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/DoNotCrashTest.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
-import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.BeforeMethod;
-import org.testng.annotations.Test;
-
-public class DoNotCrashTest {
-
-	private JoshuaConfiguration joshuaConfig = null;
-	private Decoder decoder = null;
-
-	@BeforeMethod
-	public void setUp() throws Exception {
-		joshuaConfig = new JoshuaConfiguration();
-		decoder = new Decoder(joshuaConfig, "");
-	}
-
-	@AfterMethod
-	public void tearDown() throws Exception {
-		decoder.cleanUp();
-		decoder = null;
-	}
-
-	@Test
-	public void givenProblematicInput_whenDecoding_thenNoCrash() throws IOException {
-		// Given
-		List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/dont-crash/input");
-		
-		// When
-		decodeList(inputStrings, decoder, joshuaConfig);
-		
-		// Then
-		// Did not crash
-	}
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java b/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
deleted file mode 100644
index fff1550..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
-import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
-import static org.testng.Assert.assertEquals;
-
-import java.util.List;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.util.io.KenLmTestUtil;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.Test;
-
-public class LeftStateTest {
-
-	private JoshuaConfiguration joshuaConfig;
-	private Decoder decoder;
-
-	@AfterMethod
-	public void tearDown() throws Exception {
-		if(decoder != null) {
-			decoder.cleanUp();
-			decoder = null;
-		}
-	}
-
-	@Test
-	public void givenInput_whenLeftStateDecoding_thenScoreAndTranslationCorrect() throws Exception {
-		// Given
-		List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/left-state/input.bn");
-
-		// When
-		configureDecoder("src/test/resources/decoder/left-state/joshua.config");
-		List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
-		// Then
-		List<String> goldStrings = loadStringsFromFile("src/test/resources/decoder/left-state/output.gold");
-		assertEquals(decodedStrings, goldStrings);
-	}
-	
-	public void configureDecoder(String pathToConfig) throws Exception {
-		joshuaConfig = new JoshuaConfiguration();
-		joshuaConfig.readConfigFile(pathToConfig);
-		KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
-	}
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java b/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
deleted file mode 100644
index e3f0aac..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.translate;
-import static org.testng.Assert.assertEquals;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.Test;
-
-public class LowercaseTest {
-
-  private static final String INPUT_ALL_UPPERCASED = "ELLA";
-  private static final String INPUT_CAPITALIZED = "Ella";
-
-  private static final String GOLD_UNTRANSLATED_ALL_UPPERCASED = "ELLA";
-  private static final String GOLD_LOWERCASED = "she";
-  private static final String GOLD_CAPITALIZED = "She";
-  private static final String GOLD_ALL_UPPERCASED = "SHE";
-  
-  private static final String JOSHUA_CONFIG_PATH = "src/test/resources/decoder/lowercaser/joshua.config";
-
-  private JoshuaConfiguration joshuaConfig;
-  private Decoder decoder;
-
-  /**
-   * No match in phrase table (only contains ella), therefore passed through
-   * untranslated.
-   * @throws Exception 
-   */
-  @Test
-  public void givenAllUppercasedInput_whenNotLowercasing_thenLowercasedRuleNotFound() throws Exception {
-    setUp(false, false, false);
-    String output = translate(INPUT_ALL_UPPERCASED, decoder, joshuaConfig);
-    assertEquals(output.trim(), GOLD_UNTRANSLATED_ALL_UPPERCASED);
-  }
-  
-  /**
-   * Match in phrase table (only contains ella), therefore translated.
-   * @throws Exception
-   */
-  @Test
-  public void givenAllUppercasedInput_whenLowercasing_thenLowercasedRuleFound() throws Exception {
-    setUp(true, false, false);
-    String output = translate(INPUT_ALL_UPPERCASED, decoder, joshuaConfig);
-    assertEquals(output.trim(), GOLD_LOWERCASED);
-  }
-  
-  /**
-   * Matches phrase table, not capitalized because projected from first word of sentence
-   * @throws Exception
-   */
-  @Test
-  public void givenCapitalizedInput_whenLowercasingAndProjecting_thenLowercased() throws Exception {
-    setUp(true, true, false);
-    String output = translate(INPUT_CAPITALIZED, decoder, joshuaConfig);
-    assertEquals(output.trim(), GOLD_LOWERCASED);
-  }
-  
-  /**
-   * Matches phrase table, capitalized because of output-format
-   * @throws Exception
-   */
-  @Test
-  public void givenCapitalizedInput_whenLowercasingAndOutputFormatCapitalization_thenCapitalized() throws Exception {
-    setUp(true, true, true);
-    String output = translate(INPUT_CAPITALIZED, decoder, joshuaConfig);
-    assertEquals(output.trim(), GOLD_CAPITALIZED);
-  }
-  
-  /**
-   * Matches phrase table, capitalized because of output-format
-   * @throws Exception
-   */
-  @Test
-  public void givenAllUppercasedInput_whenLowercasingAndProjecting_thenAllUppercased() throws Exception {
-    setUp(true, true, false);
-    String output = translate(INPUT_ALL_UPPERCASED, decoder, joshuaConfig);
-    assertEquals(output.trim(), GOLD_ALL_UPPERCASED);
-  }
-
-  public void setUp(boolean lowercase, boolean projectCase, boolean capitalize) throws Exception {
-    joshuaConfig = new JoshuaConfiguration();
-    joshuaConfig.readConfigFile(JOSHUA_CONFIG_PATH);
-    joshuaConfig.lowercase = lowercase;
-    joshuaConfig.project_case = projectCase;
-    joshuaConfig.outputFormat = capitalize ? "%S" : "%s";
-    decoder = new Decoder(joshuaConfig, "");
-  }
-  
-  @AfterMethod
-  public void tearDown() throws Exception {
-    decoder.cleanUp();
-    decoder = null;
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java b/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
deleted file mode 100644
index 31a347a..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
+++ /dev/null
@@ -1,64 +0,0 @@
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
-import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
-import static org.testng.Assert.assertEquals;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-import java.util.List;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.util.io.KenLmTestUtil;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.Test;
-
-public class NAryTest {
-  private JoshuaConfiguration joshuaConfig;
-  private Decoder decoder;
-
-  @AfterMethod
-  public void tearDown() throws Exception {
-    if (decoder != null) {
-      decoder.cleanUp();
-      decoder = null;
-    }
-  }
-
-  @Test
-  public void givenInput_whenNAryDecoding_thenScoreAndTranslationCorrect() throws Exception {
-    // Given
-    List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/n-ary/input.txt");
-
-    // When
-    configureDecoder("src/test/resources/decoder/n-ary/joshua.config");
-    List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
-    // Then
-    List<String> goldStrings = loadStringsFromFile("src/test/resources/decoder/n-ary/output.gold");
-    assertEquals(decodedStrings, goldStrings);
-  }
-
-  public void configureDecoder(String pathToConfig) throws Exception {
-    joshuaConfig = new JoshuaConfiguration();
-    joshuaConfig.readConfigFile(pathToConfig);
-    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/NoGrammarTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/NoGrammarTest.java b/src/test/java/org/apache/joshua/decoder/cky/NoGrammarTest.java
deleted file mode 100644
index b814d08..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/NoGrammarTest.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.translate;
-import static org.testng.Assert.assertEquals;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.BeforeMethod;
-import org.testng.annotations.Test;
-
-public class NoGrammarTest {
-
-	private static final String INPUT = "those who hurt others hurt themselves";
-	private static final String GOLD = "0 ||| those_OOV who_OOV hurt_OOV others_OOV hurt_OOV themselves_OOV ||| tm_glue_0=6.000 ||| 0.000";
-	
-	private JoshuaConfiguration joshuaConfig = null;
-	private Decoder decoder = null;
-	
-	@BeforeMethod
-	public void setUp() throws Exception {
-		joshuaConfig = new JoshuaConfiguration();
-		joshuaConfig.mark_oovs = true;
-		decoder = new Decoder(joshuaConfig, "");
-	}
-
-	@AfterMethod
-	public void tearDown() throws Exception {
-		decoder.cleanUp();
-		decoder = null;
-	}
-	
-	@Test
-	public void givenInput_whenDecodingWithoutGrammar_thenOutputAllOOV() {
-		String output = translate(INPUT, decoder, joshuaConfig);
-		assertEquals(output.trim(), GOLD);
-	}	
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java b/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
deleted file mode 100644
index 35800c6..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.stream.Collectors;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.segment_file.Sentence;
-
-public class TestUtil {
-
-	public static final String N_BEST_SEPARATOR = "\n";
-
-	/**
-	 * Loads a text file and returns a list containing one string per line
-	 * in the file.
-	 * @param pathToFile
-	 * @return
-	 * @throws IOException
-	 */
-	public static List<String> loadStringsFromFile(String pathToFile) throws IOException {
-		List<String> inputLines = Files.lines(Paths.get(pathToFile)).collect(Collectors.toList());
-		return inputLines;
-	}
-
-	/**
-	 * 
-	 * @param inputStrings
-	 *            A list of strings that should be decoded,
-	 * @param decoder
-	 *            An initialized decoder,
-	 * @param joshuaConfig
-	 *            The JoshuaConfiguration corresponding to the decoder.
-	 * @return A list of decoded strings. If the decoder produces a n-best list
-	 *         (separated by N_BEST_SEPARATOR), then each translation of the
-	 *         n-best list has its own entry in the returned list.
-	 */
-	public static List<String> decodeList(List<String> inputStrings, Decoder decoder,
-			JoshuaConfiguration joshuaConfig) {
-		final List<String> decodedStrings = new ArrayList<>();
-
-		for (String inputString : inputStrings) {
-			final Sentence sentence = new Sentence(inputString, 0, joshuaConfig);
-			final String[] nBestList = decoder.decode(sentence).toString().split(N_BEST_SEPARATOR);
-			decodedStrings.addAll(Arrays.asList(nBestList));
-		}
-
-		return decodedStrings;
-	}
-	
-	/**
-	 * Translates the given input string and returns the translation
-	 * converted into a string.
-	 * @param input
-	 * @param decoder
-	 * @param joshuaConfig
-	 * @return
-	 */
-	public static String translate(String input, Decoder decoder, JoshuaConfiguration joshuaConfig) {
-	    final Sentence sentence = new Sentence(input, 0, joshuaConfig);
-	    return decoder.decode(sentence).toString();
-	}
-
-}