You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/14 10:43:29 UTC

[8/8] incubator-joshua git commit: Moved regression tests bn-en/packed and bn-en/samt to unit test Moved the shell script regression tests from bn-en/packed and bn-en/samt to a unit test class. Also cleaned up the corresponding resource directory. Regene

Moved regression tests bn-en/packed and bn-en/samt to unit test
Moved the shell script regression tests from bn-en/packed and bn-en/samt to a unit test class. Also cleaned up the corresponding resource directory. Regenerated gold output with "%c %s" format.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/e199d851
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/e199d851
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/e199d851

Branch: refs/heads/master
Commit: e199d8514b52bf6842f19afb2afb51d218c18c3a
Parents: a81e51f
Author: Michael A. Hedderich <mi...@users.noreply.github.com>
Authored: Wed Sep 14 11:50:33 2016 +0200
Committer: Michael A. Hedderich <mi...@users.noreply.github.com>
Committed: Wed Sep 14 11:54:02 2016 +0200

----------------------------------------------------------------------
 .../joshua/decoder/cky/CKYDecodingTest.java     |  151 ++
 .../joshua/decoder/cky/HieroDecodingTest.java   |  121 --
 src/test/resources/bn-en/packed/joshua.config   |    9 +-
 src/test/resources/bn-en/packed/output.gold     | 1724 +++++++++---------
 .../resources/bn-en/packed/output.scores.gold   |  862 ---------
 src/test/resources/bn-en/packed/reference.en.0  |  100 -
 src/test/resources/bn-en/packed/reference.en.1  |  100 -
 src/test/resources/bn-en/packed/reference.en.2  |  100 -
 src/test/resources/bn-en/packed/reference.en.3  |  100 -
 .../resources/bn-en/packed/reference.en.all     |  400 ----
 src/test/resources/bn-en/packed/test.sh         |   20 -
 src/test/resources/bn-en/samt/joshua.config     |   10 +-
 src/test/resources/bn-en/samt/output.gold       |  862 +++++++++
 src/test/resources/bn-en/samt/output.gold.bleu  |   14 -
 .../resources/bn-en/samt/output.scores.gold     |  862 ---------
 src/test/resources/bn-en/samt/reference.en.0    |  100 -
 src/test/resources/bn-en/samt/reference.en.1    |  100 -
 src/test/resources/bn-en/samt/reference.en.2    |  100 -
 src/test/resources/bn-en/samt/reference.en.3    |  100 -
 src/test/resources/bn-en/samt/test.sh           |   35 -
 20 files changed, 1888 insertions(+), 3982 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e199d851/src/test/java/org/apache/joshua/decoder/cky/CKYDecodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/CKYDecodingTest.java b/src/test/java/org/apache/joshua/decoder/cky/CKYDecodingTest.java
new file mode 100644
index 0000000..c34ee41
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/CKYDecodingTest.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.testng.Assert.assertEquals;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class CKYDecodingTest {
+
+	private final static String N_BEST_SEPARATOR = "\n";
+
+	private JoshuaConfiguration joshuaConfig;
+	private Decoder decoder;
+
+	@AfterMethod
+	public void tearDown() throws Exception {
+		if(decoder != null) {
+			decoder.cleanUp();
+			decoder = null;
+		}
+	}
+
+	@Test
+	public void givenBnEnInput_whenPhraseDecoding_thenScoreAndTranslationCorrect() throws Exception {
+		// Given
+		List<String> inputStrings = loadSentencesFromFile("src/test/resources/bn-en/hiero/input.bn");
+
+		// When
+		configureDecoder("src/test/resources/bn-en/hiero/joshua.config");
+		List<String> decodedStrings = decodeList(inputStrings);
+
+		// Then
+		List<String> goldStrings = loadSentencesFromFile("src/test/resources/bn-en/hiero/output.gold");
+		assertEquals(decodedStrings, goldStrings);
+	}
+
+	@Test
+	public void givenBnEnInput_whenPhraseDecodingWithBerkeleyLM_thenScoreAndTranslationCorrect() throws Exception {
+		// Given
+		List<String> inputStrings = loadSentencesFromFile("src/test/resources/bn-en/hiero/input.bn");
+
+		// When
+		configureDecoder("src/test/resources/bn-en/hiero/joshua-berkeleylm.config");
+		List<String> decodedStrings = decodeList(inputStrings);
+
+		// Then
+		List<String> goldStrings = loadSentencesFromFile("src/test/resources/bn-en/hiero/output-berkeleylm.gold");
+		assertEquals(decodedStrings, goldStrings);
+	}
+
+	@Test
+	public void givenBnEnInput_whenPhraseDecodingWithClassLM_thenScoreAndTranslationCorrect() throws Exception {
+		// Given
+		List<String> inputStrings = loadSentencesFromFile("src/test/resources/bn-en/hiero/input.bn");
+
+		// When
+		configureDecoder("src/test/resources/bn-en/hiero/joshua-classlm.config");
+		List<String> decodedStrings = decodeList(inputStrings);
+
+		// Then
+		List<String> goldStrings = loadSentencesFromFile("src/test/resources/bn-en/hiero/output-classlm.gold");
+		assertEquals(decodedStrings, goldStrings);
+	}
+	
+	@Test
+	public void givenBnEnInput_whenPhraseDecodingWithPackedGrammar_thenScoreAndTranslationCorrect() throws Exception {
+		// Given
+		List<String> inputStrings = loadSentencesFromFile("src/test/resources/bn-en/packed/input.bn");
+
+		// When
+		configureDecoder("src/test/resources/bn-en/packed/joshua.config");
+		List<String> decodedStrings = decodeList(inputStrings);
+
+		// Then
+		List<String> goldStrings = loadSentencesFromFile("src/test/resources/bn-en/packed/output.gold");
+		assertEquals(decodedStrings, goldStrings);
+	}
+	
+	@Test
+	public void givenBnEnInput_whenPhraseDecodingWithSAMT_thenScoreAndTranslationCorrect() throws Exception {
+		// Given
+		List<String> inputStrings = loadSentencesFromFile("src/test/resources/bn-en/samt/input.bn");
+
+		// When
+		configureDecoder("src/test/resources/bn-en/samt/joshua.config");
+		List<String> decodedStrings = decodeList(inputStrings);
+
+		// Then
+		List<String> goldStrings = loadSentencesFromFile("src/test/resources/bn-en/samt/output.gold");
+		assertEquals(decodedStrings, goldStrings);
+	}
+
+	private static List<String> loadSentencesFromFile(String pathToFile) throws IOException {
+		List<String> inputLines = Files.lines(Paths.get(pathToFile)).collect(Collectors.toList());
+		return inputLines;
+	}
+
+	private void configureDecoder(String pathToConfig) throws Exception {
+		joshuaConfig = new JoshuaConfiguration();
+		joshuaConfig.readConfigFile(pathToConfig);
+		KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+	}
+	
+	/**
+	 * @param inputStrings A list of strings that should be decoded
+	 * @return A list of decoded strings. If the decoder produces
+	 * an n-best list, then each translation of the n-best list
+	 * has its own entry in the returned list.
+	 */
+	private List<String> decodeList(List<String> inputStrings) {
+		final List<String> decodedStrings = new ArrayList<>();
+
+		for (String inputString : inputStrings) {
+			final Sentence sentence = new Sentence(inputString, 0, joshuaConfig);
+			final String[] nBestList = decoder.decode(sentence).toString().split(N_BEST_SEPARATOR);
+			decodedStrings.addAll(Arrays.asList(nBestList));
+		}
+
+		return decodedStrings;
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e199d851/src/test/java/org/apache/joshua/decoder/cky/HieroDecodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/HieroDecodingTest.java b/src/test/java/org/apache/joshua/decoder/cky/HieroDecodingTest.java
deleted file mode 100644
index 7061d3b..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/HieroDecodingTest.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.testng.Assert.assertEquals;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.stream.Collectors;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.segment_file.Sentence;
-import org.apache.joshua.util.io.KenLmTestUtil;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.Test;
-
-public class HieroDecodingTest {
-
-	private final static String N_BEST_SEPARATOR = "\n";
-
-	private JoshuaConfiguration joshuaConfig;
-	private Decoder decoder;
-
-	@AfterMethod
-	public void tearDown() throws Exception {
-		decoder.cleanUp();
-		decoder = null;
-	}
-
-	@Test
-	public void givenBnEnInput_whenPhraseDecoding_thenScoreAndTranslationCorrect() throws Exception {
-		// Given
-		List<String> inputStrings = loadSentencesFromFile("src/test/resources/bn-en/hiero/input.bn");
-
-		// When
-		configureDecoder("src/test/resources/bn-en/hiero/joshua.config");
-		List<String> decodedStrings = decodeList(inputStrings);
-
-		// Then
-		List<String> goldStrings = loadSentencesFromFile("src/test/resources/bn-en/hiero/output.gold");
-		assertEquals(decodedStrings, goldStrings);
-	}
-
-	@Test
-	public void givenBnEnInput_whenPhraseDecodingWithBerkeleyLM_thenScoreAndTranslationCorrect() throws Exception {
-		// Given
-		List<String> inputStrings = loadSentencesFromFile("src/test/resources/bn-en/hiero/input.bn");
-
-		// When
-		configureDecoder("src/test/resources/bn-en/hiero/joshua-berkeleylm.config");
-		List<String> decodedStrings = decodeList(inputStrings);
-
-		// Then
-		List<String> goldStrings = loadSentencesFromFile("src/test/resources/bn-en/hiero/output-berkeleylm.gold");
-		assertEquals(decodedStrings, goldStrings);
-	}
-
-	@Test
-	public void givenBnEnInput_whenPhraseDecodingWithClassLM_thenScoreAndTranslationCorrect() throws Exception {
-		// Given
-		List<String> inputStrings = loadSentencesFromFile("src/test/resources/bn-en/hiero/input.bn");
-
-		// When
-		configureDecoder("src/test/resources/bn-en/hiero/joshua-classlm.config");
-		List<String> decodedStrings = decodeList(inputStrings);
-
-		// Then
-		List<String> goldStrings = loadSentencesFromFile("src/test/resources/bn-en/hiero/output-classlm.gold");
-		assertEquals(decodedStrings, goldStrings);
-	}
-
-	private static List<String> loadSentencesFromFile(String pathToFile) throws IOException {
-		List<String> inputLines = Files.lines(Paths.get(pathToFile)).collect(Collectors.toList());
-		return inputLines;
-	}
-
-	private void configureDecoder(String pathToConfig) throws Exception {
-		joshuaConfig = new JoshuaConfiguration();
-		joshuaConfig.readConfigFile(pathToConfig);
-		KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
-	}
-	
-	/**
-	 * @param inputStrings A list of strings that should be decoded
-	 * @return A list of decoded strings. If the decoder produces
-	 * an n-best list, then each translation of the n-best list
-	 * has its own entry in the returned list.
-	 */
-	private List<String> decodeList(List<String> inputStrings) {
-		final List<String> decodedStrings = new ArrayList<>();
-
-		for (String inputString : inputStrings) {
-			final Sentence sentence = new Sentence(inputString, 0, joshuaConfig);
-			final String[] nBestList = decoder.decode(sentence).toString().split(N_BEST_SEPARATOR);
-			decodedStrings.addAll(Arrays.asList(nBestList));
-		}
-
-		return decodedStrings;
-	}
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e199d851/src/test/resources/bn-en/packed/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/bn-en/packed/joshua.config b/src/test/resources/bn-en/packed/joshua.config
index 40c612d..f140cc4 100644
--- a/src/test/resources/bn-en/packed/joshua.config
+++ b/src/test/resources/bn-en/packed/joshua.config
@@ -1,7 +1,7 @@
-lm = kenlm 5 false false 100 lm.gz
+lm = kenlm 5 false false 100 src/test/resources/bn-en/packed/lm.gz
 
-tm = thrax pt 12 grammar.packed
-tm = thrax glue -1 grammar.glue
+tm = thrax -owner pt -maxspan 12 -path src/test/resources/bn-en/packed/grammar.packed
+tm = thrax -owner glue -maxspan -1 -path src/test/resources/bn-en/packed/grammar.glue
 
 mark_oovs = false
 
@@ -20,6 +20,9 @@ top_n = 10
 feature-function = OOVPenalty
 feature-function = WordPenalty
 
+# output format
+output-format = "%c %s"
+
 ###### model weights
 #lm order weight
 lm_0 1.3200621467242506