You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/15 11:17:36 UTC
[4/9] incubator-joshua git commit: moved new unit tests (former
regression tests) to correct location in joshua 7 (joshua-core/src)
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/decoder/lowercaser/joshua.config
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/lowercaser/joshua.config b/joshua-core/src/test/resources/decoder/lowercaser/joshua.config
new file mode 100644
index 0000000..6f5a46b
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/lowercaser/joshua.config
@@ -0,0 +1,140 @@
+# This file is a template for the Joshua pipeline; variables enclosed
+# in <angle-brackets> are substituted by the pipeline script as
+# appropriate. This file also serves to document Joshua's many
+# parameters.
+
+# These are the grammar file specifications. Joshua supports an
+# arbitrary number of grammar files, each specified on its own line
+# using the following format:
+#
+# tm = TYPE OWNER LIMIT FILE
+#
+# TYPE is "packed", "thrax", or "samt". The latter denotes the format
+# used in Zollmann and Venugopal's SAMT decoder
+# (http://www.cs.cmu.edu/~zollmann/samt/).
+#
+# OWNER is the "owner" of the rules in the grammar; this is used to
+# determine which set of phrasal features apply to the grammar's
+# rules. Having different owners allows different features to be
+# applied to different grammars, and for grammars to share features
+# across files.
+#
+# LIMIT is the maximum input span permitted for the application of
+# grammar rules found in the grammar file. A value of -1 implies no limit.
+#
+# FILE is the grammar file (or directory when using packed grammars).
+# The file can be compressed with gzip, which is determined by the
+# presence or absence of a ".gz" file extension.
+#
+# By a convention defined by Chiang (2007), the grammars are split
+# into two files: the main translation grammar containing all the
+# learned translation rules, and a glue grammar which supports
+# monotonic concatenation of hierarchical phrases. The glue grammar's
+# main distinction from the regular grammar is that the span limit
+# does not apply to it.
+
+tm = hiero -maxspan 20 -path src/test/resources/decoder/lowercaser/grammar.test -owner pt
+tm = thrax -path src/test/resources/decoder/lowercaser/grammar.glue -maxspan -1 -owner glue
+
+# This symbol is used over unknown words in the source language
+
+default-non-terminal = X
+
+# This is the goal nonterminal, used to determine when a complete
+# parse is found. It should correspond to the root-level rules in the
+# glue grammar.
+
+goal-symbol = GOAL
+
+# Language model config.
+#
+# Multiple language models are supported. For each language model,
+# create one of the following lines:
+#
+# feature-function = LanguageModel -lm_type TYPE -lm_order ORDER -lm_file FILE
+# feature-function = StateMinimizingLanguageModel -lm_order ORDER -lm_file FILE
+#
+# - TYPE is one of "kenlm" or "berkeleylm"
+# - ORDER is the order of the language model (default 5)
+# - FILE is the path to the LM file. This can be binarized if appropriate to the type
+# (e.g., KenLM has a compiled format)
+#
+# A state-minimizing LM collapses left-state. Currently only KenLM supports this.
+#
+# For each LM, add a weight lm_INDEX below, where indexing starts from 0.
+
+
+
+# The suffix _OOV is appended to unknown source-language words if this
+# is set to true.
+
+mark-oovs = false
+
+# The search algorithm: "cky" for hierarchical / phrase-based decoding,
+# "stack" for phrase-based decoding
+search = cky
+
+# The pop-limit for decoding. This determines how many hypotheses are
+# considered over each span of the input.
+
+pop-limit = 100
+
+# How many hypotheses to output
+
+top-n = 1
+
+# Whether those hypotheses should be distinct strings
+
+use-unique-nbest = true
+
+# This is the default format of the ouput printed to STDOUT. The variables that can be
+# substituted are:
+#
+# %i: the sentence number (0-indexed)
+# %s: the translated sentence
+# %t: the derivation tree
+# %f: the feature string
+# %c: the model cost
+
+output-format = %s
+
+# When printing the trees (%t in 'output-format'), this controls whether the alignments
+# are also printed.
+
+include-align-index = false
+
+# And these are the feature functions to activate.
+feature-function = OOVPenalty
+feature-function = WordPenalty
+
+## Model weights #####################################################
+
+# For each langage model line listed above, create a weight in the
+# following format: the keyword "lm", a 0-based index, and the weight.
+# lm_INDEX WEIGHT
+
+
+# The phrasal weights correspond to weights stored with each of the
+# grammar rules. The format is
+#
+# tm_OWNER_COLUMN WEIGHT
+#
+# where COLUMN denotes the 0-based order of the parameter in the
+# grammar file and WEIGHT is the corresponding weight. In the future,
+# we plan to add a sparse feature representation which will simplify
+# this.
+
+# The wordpenalty feature counts the number of words in each hypothesis.
+
+
+# This feature counts the number of unknown words in the hypothesis.
+
+
+# This feature weights paths through an input lattice. It is only activated
+# when decoding lattices.
+
+WordPenalty -4.72455379476569
+OOVPenalty 0.7897219562429866
+tm_pt_0 0.3137696816891433
+tm_glue_0 -0.04493059277470993
+
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/decoder/moses-compat/NEEDS_UPDATING
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/decoder/moses-compat/NEEDS_UPDATING b/joshua-core/src/test/resources/decoder/moses-compat/NEEDS_UPDATING
new file mode 100644
index 0000000..90402c6
--- /dev/null
+++ b/joshua-core/src/test/resources/decoder/moses-compat/NEEDS_UPDATING
@@ -0,0 +1 @@
+Needs to be moved to a unit test. The parameter JoshuaConfiguration.moses is handled by JoshuaDecoder. Therefore, the CLI must be made testable before a unit test can be created.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/config.packed
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/config.packed b/joshua-core/src/test/resources/phrase_decoder/config.packed
new file mode 100644
index 0000000..9987b1a
--- /dev/null
+++ b/joshua-core/src/test/resources/phrase_decoder/config.packed
@@ -0,0 +1,29 @@
+tm = moses -owner pt -maxspan 0 -path rules.packed -max-source-len 5
+feature-function = StateMinimizingLanguageModel -lm_order 5 -lm_file lm.1.gz
+
+search = stack
+
+mark-oovs = false
+pop-limit = 10
+top-n = 1
+
+output-format = %i ||| %s ||| %f ||| %c
+
+include-align-index = false
+reordering-limit = 6
+
+# And these are the feature functions to activate.
+feature-function = OOVPenalty
+feature-function = WordPenalty
+feature-function = Distortion
+feature-function = PhrasePenalty -owner pt
+
+OOVPenalty 1.0
+Distortion 0.114849
+WordPenalty -0.201544
+PhrasePenalty -0.236965
+tm_pt_0 0.0370068
+tm_pt_1 0.0495759
+tm_pt_2 0.196742
+tm_pt_3 0.0745423
+lm_0 0.204412452147565
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/config
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/config b/joshua-core/src/test/resources/phrase_decoder/rules.packed/config
new file mode 100644
index 0000000..2251fe6
--- /dev/null
+++ b/joshua-core/src/test/resources/phrase_decoder/rules.packed/config
@@ -0,0 +1,2 @@
+version = 4
+max-source-len = 3
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/encoding
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/encoding b/joshua-core/src/test/resources/phrase_decoder/rules.packed/encoding
new file mode 100644
index 0000000..57e7b75
Binary files /dev/null and b/joshua-core/src/test/resources/phrase_decoder/rules.packed/encoding differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.features
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.features b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.features
new file mode 100644
index 0000000..2a77e43
Binary files /dev/null and b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.features differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.source
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.source b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.source
new file mode 100644
index 0000000..c384c54
Binary files /dev/null and b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.source differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target
new file mode 100644
index 0000000..8375cf0
Binary files /dev/null and b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target.lookup
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target.lookup b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target.lookup
new file mode 100644
index 0000000..3e8c294
Binary files /dev/null and b/joshua-core/src/test/resources/phrase_decoder/rules.packed/slice_00000.target.lookup differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/joshua-core/src/test/resources/phrase_decoder/rules.packed/vocabulary
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/rules.packed/vocabulary b/joshua-core/src/test/resources/phrase_decoder/rules.packed/vocabulary
new file mode 100644
index 0000000..528a970
Binary files /dev/null and b/joshua-core/src/test/resources/phrase_decoder/rules.packed/vocabulary differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/BnEnDecodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/BnEnDecodingTest.java b/src/test/java/org/apache/joshua/decoder/cky/BnEnDecodingTest.java
deleted file mode 100644
index d11d58f..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/BnEnDecodingTest.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
-import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
-import static org.testng.Assert.assertEquals;
-
-import java.util.List;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.util.io.KenLmTestUtil;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.Test;
-
-public class BnEnDecodingTest {
-
- private JoshuaConfiguration joshuaConfig;
- private Decoder decoder;
-
- @AfterMethod
- public void tearDown() throws Exception {
- if(decoder != null) {
- decoder.cleanUp();
- decoder = null;
- }
- }
-
- @Test
- public void givenBnEnInput_whenPhraseDecoding_thenScoreAndTranslationCorrect() throws Exception {
- // Given
- List<String> inputStrings = loadStringsFromFile("src/test/resources/bn-en/hiero/input.bn");
-
- // When
- configureDecoder("src/test/resources/bn-en/hiero/joshua.config");
- List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
- // Then
- List<String> goldStrings = loadStringsFromFile("src/test/resources/bn-en/hiero/output.gold");
- assertEquals(decodedStrings, goldStrings);
- }
-
- @Test
- public void givenBnEnInput_whenPhraseDecodingWithBerkeleyLM_thenScoreAndTranslationCorrect() throws Exception {
- // Given
- List<String> inputStrings = loadStringsFromFile("src/test/resources/bn-en/hiero/input.bn");
-
- // When
- configureDecoder("src/test/resources/bn-en/hiero/joshua-berkeleylm.config");
- List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
- // Then
- List<String> goldStrings = loadStringsFromFile("src/test/resources/bn-en/hiero/output-berkeleylm.gold");
- assertEquals(decodedStrings, goldStrings);
- }
-
- @Test
- public void givenBnEnInput_whenPhraseDecodingWithClassLM_thenScoreAndTranslationCorrect() throws Exception {
- // Given
- List<String> inputStrings = loadStringsFromFile("src/test/resources/bn-en/hiero/input.bn");
-
- // When
- configureDecoder("src/test/resources/bn-en/hiero/joshua-classlm.config");
- List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
- // Then
- List<String> goldStrings = loadStringsFromFile("src/test/resources/bn-en/hiero/output-classlm.gold");
- assertEquals(decodedStrings, goldStrings);
- }
-
- @Test
- public void givenBnEnInput_whenPhraseDecodingWithPackedGrammar_thenScoreAndTranslationCorrect() throws Exception {
- // Given
- List<String> inputStrings = loadStringsFromFile("src/test/resources/bn-en/packed/input.bn");
-
- // When
- configureDecoder("src/test/resources/bn-en/packed/joshua.config");
- List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
- // Then
- List<String> goldStrings = loadStringsFromFile("src/test/resources/bn-en/packed/output.gold");
- assertEquals(decodedStrings, goldStrings);
- }
-
- @Test
- public void givenBnEnInput_whenPhraseDecodingWithSAMT_thenScoreAndTranslationCorrect() throws Exception {
- // Given
- List<String> inputStrings = loadStringsFromFile("src/test/resources/bn-en/samt/input.bn");
-
- // When
- configureDecoder("src/test/resources/bn-en/samt/joshua.config");
- List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
- // Then
- List<String> goldStrings = loadStringsFromFile("src/test/resources/bn-en/samt/output.gold");
- assertEquals(decodedStrings, goldStrings);
- }
-
- public void configureDecoder(String pathToConfig) throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.readConfigFile(pathToConfig);
- KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/ConstrainedTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/ConstrainedTest.java b/src/test/java/org/apache/joshua/decoder/cky/ConstrainedTest.java
deleted file mode 100644
index 53bab7a..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/ConstrainedTest.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
-import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
-import static org.testng.Assert.assertEquals;
-
-import java.util.List;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.util.io.KenLmTestUtil;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.Test;
-
-public class ConstrainedTest {
-
- private JoshuaConfiguration joshuaConfig;
- private Decoder decoder;
-
- @AfterMethod
- public void tearDown() throws Exception {
- if(decoder != null) {
- decoder.cleanUp();
- decoder = null;
- }
- }
-
- @Test
- public void givenInput_whenConstrainedDecoding_thenScoreAndTranslationCorrect() throws Exception {
- // Given
- List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/constrained/input.bn");
-
- // When
- configureDecoder("src/test/resources/decoder/constrained/joshua.config");
- List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
- // Then
- List<String> goldStrings = loadStringsFromFile("src/test/resources/decoder/constrained/output.gold");
- assertEquals(decodedStrings, goldStrings);
- }
-
- public void configureDecoder(String pathToConfig) throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.readConfigFile(pathToConfig);
- KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/DenormalizationTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/DenormalizationTest.java b/src/test/java/org/apache/joshua/decoder/cky/DenormalizationTest.java
deleted file mode 100644
index cbfb98b..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/DenormalizationTest.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.translate;
-import static org.testng.Assert.assertEquals;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.BeforeMethod;
-import org.testng.annotations.Test;
-
-public class DenormalizationTest {
-
- private static final String INPUT = "� who you lookin' at , mr. ?";
- private static final String GOLD = "�Who you lookin' at, Mr.?";
-
- private JoshuaConfiguration joshuaConfig = null;
- private Decoder decoder = null;
-
- @BeforeMethod
- public void setUp() throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.outputFormat = "%S";
- joshuaConfig.mark_oovs = false;
- joshuaConfig.topN = 1;
- decoder = new Decoder(joshuaConfig, "");
- }
-
- @AfterMethod
- public void tearDown() throws Exception {
- decoder.cleanUp();
- decoder = null;
- }
-
- @Test
- public void givenTokenizedInputWithSpecialCharacters_whenDecoding_thenOutputNormalized() {
- String output = translate(INPUT, decoder, joshuaConfig);
- assertEquals(output.trim(), GOLD);
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/DoNotCrashTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/DoNotCrashTest.java b/src/test/java/org/apache/joshua/decoder/cky/DoNotCrashTest.java
deleted file mode 100644
index 4a7010b..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/DoNotCrashTest.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
-import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.BeforeMethod;
-import org.testng.annotations.Test;
-
-public class DoNotCrashTest {
-
- private JoshuaConfiguration joshuaConfig = null;
- private Decoder decoder = null;
-
- @BeforeMethod
- public void setUp() throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- decoder = new Decoder(joshuaConfig, "");
- }
-
- @AfterMethod
- public void tearDown() throws Exception {
- decoder.cleanUp();
- decoder = null;
- }
-
- @Test
- public void givenProblematicInput_whenDecoding_thenNoCrash() throws IOException {
- // Given
- List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/dont-crash/input");
-
- // When
- decodeList(inputStrings, decoder, joshuaConfig);
-
- // Then
- // Did not crash
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java b/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
deleted file mode 100644
index fff1550..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
-import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
-import static org.testng.Assert.assertEquals;
-
-import java.util.List;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.util.io.KenLmTestUtil;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.Test;
-
-public class LeftStateTest {
-
- private JoshuaConfiguration joshuaConfig;
- private Decoder decoder;
-
- @AfterMethod
- public void tearDown() throws Exception {
- if(decoder != null) {
- decoder.cleanUp();
- decoder = null;
- }
- }
-
- @Test
- public void givenInput_whenLeftStateDecoding_thenScoreAndTranslationCorrect() throws Exception {
- // Given
- List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/left-state/input.bn");
-
- // When
- configureDecoder("src/test/resources/decoder/left-state/joshua.config");
- List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
- // Then
- List<String> goldStrings = loadStringsFromFile("src/test/resources/decoder/left-state/output.gold");
- assertEquals(decodedStrings, goldStrings);
- }
-
- public void configureDecoder(String pathToConfig) throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.readConfigFile(pathToConfig);
- KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java b/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
deleted file mode 100644
index e3f0aac..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.translate;
-import static org.testng.Assert.assertEquals;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.Test;
-
-public class LowercaseTest {
-
- private static final String INPUT_ALL_UPPERCASED = "ELLA";
- private static final String INPUT_CAPITALIZED = "Ella";
-
- private static final String GOLD_UNTRANSLATED_ALL_UPPERCASED = "ELLA";
- private static final String GOLD_LOWERCASED = "she";
- private static final String GOLD_CAPITALIZED = "She";
- private static final String GOLD_ALL_UPPERCASED = "SHE";
-
- private static final String JOSHUA_CONFIG_PATH = "src/test/resources/decoder/lowercaser/joshua.config";
-
- private JoshuaConfiguration joshuaConfig;
- private Decoder decoder;
-
- /**
- * No match in phrase table (only contains ella), therefore passed through
- * untranslated.
- * @throws Exception
- */
- @Test
- public void givenAllUppercasedInput_whenNotLowercasing_thenLowercasedRuleNotFound() throws Exception {
- setUp(false, false, false);
- String output = translate(INPUT_ALL_UPPERCASED, decoder, joshuaConfig);
- assertEquals(output.trim(), GOLD_UNTRANSLATED_ALL_UPPERCASED);
- }
-
- /**
- * Match in phrase table (only contains ella), therefore translated.
- * @throws Exception
- */
- @Test
- public void givenAllUppercasedInput_whenLowercasing_thenLowercasedRuleFound() throws Exception {
- setUp(true, false, false);
- String output = translate(INPUT_ALL_UPPERCASED, decoder, joshuaConfig);
- assertEquals(output.trim(), GOLD_LOWERCASED);
- }
-
- /**
- * Matches phrase table, not capitalized because projected from first word of sentence
- * @throws Exception
- */
- @Test
- public void givenCapitalizedInput_whenLowercasingAndProjecting_thenLowercased() throws Exception {
- setUp(true, true, false);
- String output = translate(INPUT_CAPITALIZED, decoder, joshuaConfig);
- assertEquals(output.trim(), GOLD_LOWERCASED);
- }
-
- /**
- * Matches phrase table, capitalized because of output-format
- * @throws Exception
- */
- @Test
- public void givenCapitalizedInput_whenLowercasingAndOutputFormatCapitalization_thenCapitalized() throws Exception {
- setUp(true, true, true);
- String output = translate(INPUT_CAPITALIZED, decoder, joshuaConfig);
- assertEquals(output.trim(), GOLD_CAPITALIZED);
- }
-
- /**
- * Matches phrase table, capitalized because of output-format
- * @throws Exception
- */
- @Test
- public void givenAllUppercasedInput_whenLowercasingAndProjecting_thenAllUppercased() throws Exception {
- setUp(true, true, false);
- String output = translate(INPUT_ALL_UPPERCASED, decoder, joshuaConfig);
- assertEquals(output.trim(), GOLD_ALL_UPPERCASED);
- }
-
- public void setUp(boolean lowercase, boolean projectCase, boolean capitalize) throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.readConfigFile(JOSHUA_CONFIG_PATH);
- joshuaConfig.lowercase = lowercase;
- joshuaConfig.project_case = projectCase;
- joshuaConfig.outputFormat = capitalize ? "%S" : "%s";
- decoder = new Decoder(joshuaConfig, "");
- }
-
- @AfterMethod
- public void tearDown() throws Exception {
- decoder.cleanUp();
- decoder = null;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java b/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
deleted file mode 100644
index 31a347a..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/NAryTest.java
+++ /dev/null
@@ -1,64 +0,0 @@
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
-import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
-import static org.testng.Assert.assertEquals;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-import java.util.List;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.util.io.KenLmTestUtil;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.Test;
-
-public class NAryTest {
- private JoshuaConfiguration joshuaConfig;
- private Decoder decoder;
-
- @AfterMethod
- public void tearDown() throws Exception {
- if (decoder != null) {
- decoder.cleanUp();
- decoder = null;
- }
- }
-
- @Test
- public void givenInput_whenNAryDecoding_thenScoreAndTranslationCorrect() throws Exception {
- // Given
- List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/n-ary/input.txt");
-
- // When
- configureDecoder("src/test/resources/decoder/n-ary/joshua.config");
- List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
-
- // Then
- List<String> goldStrings = loadStringsFromFile("src/test/resources/decoder/n-ary/output.gold");
- assertEquals(decodedStrings, goldStrings);
- }
-
- public void configureDecoder(String pathToConfig) throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.readConfigFile(pathToConfig);
- KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/NoGrammarTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/NoGrammarTest.java b/src/test/java/org/apache/joshua/decoder/cky/NoGrammarTest.java
deleted file mode 100644
index b814d08..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/NoGrammarTest.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import static org.apache.joshua.decoder.cky.TestUtil.translate;
-import static org.testng.Assert.assertEquals;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.BeforeMethod;
-import org.testng.annotations.Test;
-
-public class NoGrammarTest {
-
- private static final String INPUT = "those who hurt others hurt themselves";
- private static final String GOLD = "0 ||| those_OOV who_OOV hurt_OOV others_OOV hurt_OOV themselves_OOV ||| tm_glue_0=6.000 ||| 0.000";
-
- private JoshuaConfiguration joshuaConfig = null;
- private Decoder decoder = null;
-
- @BeforeMethod
- public void setUp() throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.mark_oovs = true;
- decoder = new Decoder(joshuaConfig, "");
- }
-
- @AfterMethod
- public void tearDown() throws Exception {
- decoder.cleanUp();
- decoder = null;
- }
-
- @Test
- public void givenInput_whenDecodingWithoutGrammar_thenOutputAllOOV() {
- String output = translate(INPUT, decoder, joshuaConfig);
- assertEquals(output.trim(), GOLD);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ee7398f7/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java b/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
deleted file mode 100644
index 35800c6..0000000
--- a/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.cky;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.stream.Collectors;
-
-import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.segment_file.Sentence;
-
-public class TestUtil {
-
- public static final String N_BEST_SEPARATOR = "\n";
-
- /**
- * Loads a text file and returns a list containing one string per line
- * in the file.
- * @param pathToFile
- * @return
- * @throws IOException
- */
- public static List<String> loadStringsFromFile(String pathToFile) throws IOException {
- List<String> inputLines = Files.lines(Paths.get(pathToFile)).collect(Collectors.toList());
- return inputLines;
- }
-
- /**
- *
- * @param inputStrings
- * A list of strings that should be decoded,
- * @param decoder
- * An initialized decoder,
- * @param joshuaConfig
- * The JoshuaConfiguration corresponding to the decoder.
- * @return A list of decoded strings. If the decoder produces a n-best list
- * (separated by N_BEST_SEPARATOR), then each translation of the
- * n-best list has its own entry in the returned list.
- */
- public static List<String> decodeList(List<String> inputStrings, Decoder decoder,
- JoshuaConfiguration joshuaConfig) {
- final List<String> decodedStrings = new ArrayList<>();
-
- for (String inputString : inputStrings) {
- final Sentence sentence = new Sentence(inputString, 0, joshuaConfig);
- final String[] nBestList = decoder.decode(sentence).toString().split(N_BEST_SEPARATOR);
- decodedStrings.addAll(Arrays.asList(nBestList));
- }
-
- return decodedStrings;
- }
-
- /**
- * Translates the given input string and returns the translation
- * converted into a string.
- * @param input
- * @param decoder
- * @param joshuaConfig
- * @return
- */
- public static String translate(String input, Decoder decoder, JoshuaConfiguration joshuaConfig) {
- final Sentence sentence = new Sentence(input, 0, joshuaConfig);
- return decoder.decode(sentence).toString();
- }
-
-}