You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/17 12:28:01 UTC
[12/14] incubator-joshua git commit: More Unit test conversions
More Unit test conversions
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/b8256c71
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/b8256c71
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/b8256c71
Branch: refs/heads/7_confsystem
Commit: b8256c71e9f707ba6a13e0504a2448e98108e903
Parents: 2e3b10b
Author: Hieber, Felix <fh...@amazon.de>
Authored: Fri Sep 16 14:05:52 2016 +0200
Committer: Hieber, Felix <fh...@amazon.de>
Committed: Fri Sep 16 14:05:52 2016 +0200
----------------------------------------------------------------------
.../resources/phrase_decoder/constrained.config | 50 ++++++-----
.../joshua/decoder/cky/LeftStateTest.java | 2 +-
.../joshua/decoder/cky/LowercaseTest.java | 2 +-
.../org/apache/joshua/decoder/cky/TestUtil.java | 4 +-
.../lm/berkeley_lm/LMGrammarBerkeleyTest.java | 2 +-
.../kbest_extraction/KBestExtractionTest.java | 15 ++--
.../ConstrainedPhraseDecodingTest.java | 14 +--
.../phrase/decode/PhraseDecodingTest.java | 26 +++---
.../apache/joshua/system/LmOovFeatureTest.java | 4 +-
.../system/MultithreadedTranslationTests.java | 91 ++++++++++++--------
.../joshua/system/StructuredOutputTest.java | 53 +++++-------
.../system/StructuredTranslationTest.java | 8 +-
.../resources/kbest_extraction/joshua.config | 30 ++++---
.../src/test/resources/phrase_decoder/config | 52 ++++++-----
.../resources/phrase_decoder/constrained.config | 50 ++++++-----
15 files changed, 216 insertions(+), 187 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/resources/phrase_decoder/constrained.config
----------------------------------------------------------------------
diff --git a/joshua-core/resources/phrase_decoder/constrained.config b/joshua-core/resources/phrase_decoder/constrained.config
index 4642650..13aace4 100644
--- a/joshua-core/resources/phrase_decoder/constrained.config
+++ b/joshua-core/resources/phrase_decoder/constrained.config
@@ -1,28 +1,34 @@
-tm = moses pt 0 resources/phrase_decoder/rules.1.gz
+grammars = [
+ {class=TextGrammar, owner=pt, span_limit=0, path=src/test/resources/phrase_decoder/rules.1.gz},
+]
-lm = kenlm 5 true false 100 resources/phrase_decoder/lm.1.gz
+mark_oovs = false
+pop_limit = 10
+top_n = 5
-mark-oovs = false
-pop-limit = 10
-top-n = 5
+output_format = %i ||| %s ||| %f ||| %c
-output-format = %i ||| %s ||| %f ||| %c
+include_align_index = true
+reordering_limit = 10
-include-align-index = true
-reordering-limit = 10
-# And these are the feature functions to activate.
-feature-function = OOVPenalty
-feature-function = WordPenalty
-feature-function = Distortion
-feature-function = PhrasePenalty -owner pt
+feature_functions = [
+ {class=LanguageModel, lm_type=kenlm, lm_order=5, lm_file=src/test/resources/phrase_decoder/lm.1.gz},
+ {class=OOVPenalty},
+ {class=WordPenalty},
+ {class=Distortion},
+ {class=PhrasePenalty, owner=pt},
+]
-OOVPenalty 1.0
-Distortion 0.114849
-WordPenalty -0.201544
-PhrasePenalty -0.236965
-tm_pt_0 0.0370068
-tm_pt_1 0.0495759
-tm_pt_2 0.196742
-tm_pt_3 0.0745423
-lm_0 0.204412452147565
+
+weights = {
+ OOVPenalty = 1
+ Distortion = 0.114849
+ WordPenalty = -0.201544
+ PhrasePenalty = -0.236965
+ pt_0 = 0.0370068
+ pt_1 = 0.0495759
+ pt_2 = 0.196742
+ pt_3 = 0.0745423
+ lm_0 = 0.204412452147565
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
index eb24102..e169910 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
@@ -58,6 +58,6 @@ public class LeftStateTest {
}
public void configureDecoder(File pathToConfig) throws Exception {
- KenLmTestUtil.Guard(() -> decoder = new Decoder(Decoder.createDecoderFlagsFromFile(pathToConfig)));
+ KenLmTestUtil.Guard(() -> decoder = new Decoder(Decoder.getFlagsFromFile(pathToConfig)));
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
index 2efe2e7..167718f 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
@@ -101,7 +101,7 @@ public class LowercaseTest {
}
public void setUp(boolean lowercase, boolean projectCase, boolean capitalize) throws Exception {
- Config config = Decoder.createDecoderFlagsFromFile(JOSHUA_CONFIG_PATH)
+ Config config = Decoder.getFlagsFromFile(JOSHUA_CONFIG_PATH)
.withValue("lowercase", ConfigValueFactory.fromAnyRef(lowercase))
.withValue("project_case", ConfigValueFactory.fromAnyRef(projectCase))
.withValue("output_format", ConfigValueFactory.fromAnyRef(capitalize ? "%S" : "%s"));
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
index 349d428..02c79c9 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
@@ -61,7 +61,7 @@ public class TestUtil {
final List<String> decodedStrings = new ArrayList<>();
for (String inputString : inputStrings) {
- final Sentence sentence = new Sentence(inputString, 0, decoder.getDecoderConfig().getFlags());
+ final Sentence sentence = new Sentence(inputString, 0, decoder.getFlags());
final String[] nBestList = decoder.decode(sentence).toString().split(N_BEST_SEPARATOR);
decodedStrings.addAll(Arrays.asList(nBestList));
}
@@ -78,7 +78,7 @@ public class TestUtil {
* @return
*/
public static String translate(String input, Decoder decoder) {
- final Sentence sentence = new Sentence(input, 0, decoder.getDecoderConfig().getFlags());
+ final Sentence sentence = new Sentence(input, 0, decoder.getFlags());
return decoder.decode(sentence).toString();
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
index 0fbcab6..2c48c64 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
@@ -76,7 +76,7 @@ public class LMGrammarBerkeleyTest {
}
private Translation decode(String input) {
- final Sentence sentence = new Sentence(input, 0, decoder.getDecoderConfig().getFlags());
+ final Sentence sentence = new Sentence(input, 0, decoder.getFlags());
return decoder.decode(sentence);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
index 8be422e..b84d4b3 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
@@ -27,6 +27,9 @@ import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import org.testng.reporters.Files;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigValueFactory;
+
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
@@ -44,17 +47,13 @@ public class KBestExtractionTest {
private static final String CONFIG = "src/test/resources/kbest_extraction/joshua.config";
private static final String INPUT = "a b c d e";
private static final Path GOLD_PATH = Paths.get("src/test/resources/kbest_extraction/output.scores.gold");
-
- private JoshuaConfiguration joshuaConfig = null;
private Decoder decoder = null;
@BeforeMethod
public void setUp() throws Exception {
- //BROKEN
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.readConfigFile(CONFIG);
- joshuaConfig.outputFormat = "%i ||| %s ||| %c";
- KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
+ Config config = Decoder.getFlagsFromFile(new File(CONFIG))
+ .withValue("output_format", ConfigValueFactory.fromAnyRef("%i ||| %s ||| %c"));
+ KenLmTestUtil.Guard(() -> decoder = new Decoder(config));
}
@AfterMethod
@@ -72,7 +71,7 @@ public class KBestExtractionTest {
}
private Translation decode(String input) {
- final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ final Sentence sentence = new Sentence(input, 0, decoder.getFlags());
return decoder.decode(sentence);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
index d81c522..10108b1 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
@@ -22,12 +22,12 @@ import static com.google.common.base.Charsets.UTF_8;
import static java.nio.file.Files.readAllBytes;
import static org.testng.Assert.assertEquals;
+import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.Translation;
import org.apache.joshua.decoder.segment_file.Sentence;
import org.apache.joshua.util.io.KenLmTestUtil;
@@ -35,6 +35,8 @@ import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
+import com.typesafe.config.Config;
+
/**
* Reimplements the constrained phrase decoding test
*/
@@ -44,15 +46,13 @@ public class ConstrainedPhraseDecodingTest {
private static final String CONFIG = "resources/phrase_decoder/constrained.config";
private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama ||| President Obama to hinder a strategy for Republican re @-@ election";
private static final Path GOLD_PATH = Paths.get("resources/phrase_decoder/constrained.output.gold");
-
- private JoshuaConfiguration joshuaConfig = null;
+
private Decoder decoder = null;
@BeforeMethod
public void setUp() throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.readConfigFile(CONFIG);
- KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
+ Config config = Decoder.getFlagsFromFile(new File(CONFIG));
+ KenLmTestUtil.Guard(() -> decoder = new Decoder(config));
}
@AfterMethod
@@ -69,7 +69,7 @@ public class ConstrainedPhraseDecodingTest {
}
private Translation decode(String input) {
- final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ final Sentence sentence = new Sentence(input, 0, decoder.getFlags());
return decoder.decode(sentence);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index 5d40bf7..e121339 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@ -20,10 +20,10 @@
import static org.testng.Assert.assertEquals;
+import java.io.File;
import java.io.IOException;
import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.Translation;
import org.apache.joshua.decoder.segment_file.Sentence;
import org.apache.joshua.util.io.KenLmTestUtil;
@@ -31,6 +31,9 @@ import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigValueFactory;
+
/**
* Reimplements the constrained phrase decoding test
*/
@@ -41,14 +44,12 @@ public class PhraseDecodingTest {
private static final String OUTPUT = "0 ||| a strategy republican to hinder reelection Obama ||| pt_3=-8.555386 pt_2=-7.542729 pt_1=-10.799793 pt_0=-9.702445 lm_0=-19.116861 WordPenalty=-3.040061 PhrasePenalty=5.000000 Distortion=0.000000 ||| -7.496";
private static final String OUTPUT_WITH_ALIGNMENTS = "0 ||| a strategy |0-1| republican |2-2| to hinder |3-4| reelection |5-6| Obama |7-8| ||| Distortion=0.000000 WordPenalty=-3.040061 PhrasePenalty=5.000000 pt_0=-9.702445 pt_1=-10.799793 pt_2=-7.542729 pt_3=-8.555386 lm_0=-19.116861 ||| -7.496";
- private JoshuaConfiguration joshuaConfig = null;
private Decoder decoder = null;
@BeforeMethod
public void setUp() throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.readConfigFile(CONFIG);
- KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
+ Config config = Decoder.getFlagsFromFile(new File(CONFIG));
+ KenLmTestUtil.Guard(() -> decoder = new Decoder(config));
}
@AfterMethod
@@ -59,7 +60,7 @@ public class PhraseDecodingTest {
@Test(enabled = true)
public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
- final String translation = decode(INPUT).toString().trim();
+ final String translation = decode(INPUT, "%i ||| %s ||| %f ||| %c").toString().trim();
final String gold = OUTPUT;
assertEquals(translation, gold);
}
@@ -72,24 +73,21 @@ public class PhraseDecodingTest {
*/
@Test(enabled = false)
public void givenInput_whenPhraseDecodingWithAlignments_thenOutputHasAlignments() throws IOException {
- final String translation = decode(INPUT).toString().trim();
+ final String translation = decode(INPUT, "%i ||| %s ||| %f ||| %c").toString().trim();
final String gold = OUTPUT_WITH_ALIGNMENTS;
assertEquals(translation, gold);
}
@Test(enabled = true)
public void givenInput_whenPhraseDecoding_thenInputCanBeRetrieved() throws IOException {
- String outputFormat = joshuaConfig.outputFormat;
- joshuaConfig.outputFormat = "%e";
- final String translation = decode(INPUT).toString().trim();
- joshuaConfig.outputFormat = outputFormat;
+ final String translation = decode(INPUT, "%e").toString().trim();
final String gold = INPUT;
assertEquals(translation, gold);
}
- private Translation decode(String input) {
- final Sentence sentence = new Sentence(input, 0, joshuaConfig);
-// joshuaConfig.setVerbosity(2);
+ private Translation decode(String input, String outputFormat) {
+ final Config flags = decoder.getFlags().withValue("output_format", ConfigValueFactory.fromAnyRef(outputFormat));
+ final Sentence sentence = new Sentence(input, 0, flags);
return decoder.decode(sentence);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java b/joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java
index df50309..9bf01ad 100644
--- a/joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java
@@ -40,7 +40,7 @@ public class LmOovFeatureTest {
// expecting 2 lm oovs ('a' & 'full') and 2 grammar OOVs ('chat-rooms' & 'full') and score -198.000
private static final String EXPECTED_FEATURES = "pt_0=-2.000000 lm_0_oov=2.000000 lm_0=-206.718124 glue_0=3.000000 OOVPenalty=-200.000000 | -198.000";
- private static final Config FLAGS = Decoder.createDecoderFlagsFromFile(CONFIG).withValue("output_format", ConfigValueFactory.fromAnyRef("%f | %c"));
+ private static final Config FLAGS = Decoder.getFlagsFromFile(CONFIG).withValue("output_format", ConfigValueFactory.fromAnyRef("%f | %c"));
private Decoder decoder = null;
@BeforeMethod
@@ -62,7 +62,7 @@ public class LmOovFeatureTest {
}
private Translation decode(String input) {
- final Sentence sentence = new Sentence(input, 0, decoder.getDecoderConfig().getFlags());
+ final Sentence sentence = new Sentence(input, 0, decoder.getFlags());
return decoder.decode(sentence);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java b/joshua-core/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
index 01d3963..12f4766 100644
--- a/joshua-core/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
+++ b/joshua-core/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
@@ -18,6 +18,8 @@
*/
package org.apache.joshua.system;
+import static com.typesafe.config.ConfigFactory.parseString;
+import static com.typesafe.config.ConfigValueFactory.fromAnyRef;
import static org.mockito.Mockito.doReturn;
import static org.testng.Assert.assertTrue;
@@ -30,7 +32,6 @@ import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.Translation;
import org.apache.joshua.decoder.TranslationResponseStream;
import org.apache.joshua.decoder.io.TranslationRequestStream;
@@ -40,6 +41,8 @@ import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
+import com.typesafe.config.Config;
+
/**
* Integration test for multithreaded Joshua decoder tests. Grammar used is a
* toy packed grammar.
@@ -48,51 +51,66 @@ import org.testng.annotations.Test;
*/
public class MultithreadedTranslationTests {
- private JoshuaConfiguration joshuaConfig = null;
private Decoder decoder = null;
private static final String INPUT = "A K B1 U Z1 Z2 B2 C";
private static final String EXCEPTION_MESSAGE = "This exception should properly propagate";
- private int previousLogLevel;
private final static long NANO_SECONDS_PER_SECOND = 1_000_000_000;
@BeforeClass
public void setUp() throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.search_algorithm = "cky";
- joshuaConfig.mark_oovs = false;
- joshuaConfig.pop_limit = 100;
- joshuaConfig.use_unique_nbest = false;
- joshuaConfig.include_align_index = false;
- joshuaConfig.topN = 0;
- joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path src/test/resources/wa_grammar.packed");
- joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path src/test/resources/grammar.glue");
- joshuaConfig.goal_symbol = "[GOAL]";
- joshuaConfig.default_non_terminal = "[X]";
- joshuaConfig.features.add("OOVPenalty");
- joshuaConfig.weights.add("tm_pt_0 1");
- joshuaConfig.weights.add("tm_pt_1 1");
- joshuaConfig.weights.add("tm_pt_2 1");
- joshuaConfig.weights.add("tm_pt_3 1");
- joshuaConfig.weights.add("tm_pt_4 1");
- joshuaConfig.weights.add("tm_pt_5 1");
- joshuaConfig.weights.add("tm_glue_0 1");
- joshuaConfig.weights.add("OOVPenalty 2");
- joshuaConfig.num_parallel_decoders = 500; // This will enable 500 parallel
- // decoders to run at once.
- // Useful to help flush out
- // concurrency errors in
- // underlying
- // data-structures.
- this.decoder = new Decoder(joshuaConfig);
- previousLogLevel = Decoder.VERBOSE;
- Decoder.VERBOSE = 0;
+ Config weights = parseString(
+ "weights = {pt_0=-1, pt_1=-1, pt_2=-1, pt_3=-1, pt_4=-1, pt_5=-1, glue_0=-1, OOVPenalty=2}");
+ Config features = parseString("feature_functions = [{class=OOVPenalty}]");
+ Config grammars = parseString("grammars=[{class=TextGrammar, owner=pt, span_limit=20, path=src/test/resources/wa_grammar},"
+ + "{class=TextGrammar, owner=glue, span_limit=-1, path=src/test/resources/grammar.glue}]");
+ Config flags = weights
+ .withFallback(features)
+ .withFallback(grammars)
+ .withFallback(Decoder.getDefaultFlags())
+ .withValue("top_n", fromAnyRef(0))
+ .withValue("use_unique_nbest", fromAnyRef(false))
+ .withValue("use_structured_output", fromAnyRef(true))
+ .withValue("num_parallel_decoders", fromAnyRef(500)); // This will enable 500 parallel
+ // decoders to run at once.
+ // Useful to help flush out
+ // concurrency errors in
+ // underlying
+ // data-structures.
+ decoder = new Decoder(flags);
+
+// joshuaConfig = new JoshuaConfiguration();
+// joshuaConfig.search_algorithm = "cky";
+// joshuaConfig.mark_oovs = false;
+// joshuaConfig.pop_limit = 100;
+// joshuaConfig.use_unique_nbest = false;
+// joshuaConfig.include_align_index = false;
+// joshuaConfig.topN = 0;
+// joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path src/test/resources/wa_grammar.packed");
+// joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path src/test/resources/grammar.glue");
+// joshuaConfig.goal_symbol = "[GOAL]";
+// joshuaConfig.default_non_terminal = "[X]";
+// joshuaConfig.features.add("OOVPenalty");
+// joshuaConfig.weights.add("tm_pt_0 1");
+// joshuaConfig.weights.add("tm_pt_1 1");
+// joshuaConfig.weights.add("tm_pt_2 1");
+// joshuaConfig.weights.add("tm_pt_3 1");
+// joshuaConfig.weights.add("tm_pt_4 1");
+// joshuaConfig.weights.add("tm_pt_5 1");
+// joshuaConfig.weights.add("tm_glue_0 1");
+// joshuaConfig.weights.add("OOVPenalty 2");
+// joshuaConfig.num_parallel_decoders = 500; // This will enable 500 parallel
+// // decoders to run at once.
+// // Useful to help flush out
+// // concurrency errors in
+// // underlying
+// // data-structures.
+ this.decoder = new Decoder(flags);
}
@AfterClass
public void tearDown() throws Exception {
this.decoder.cleanUp();
this.decoder = null;
- Decoder.VERBOSE = previousLogLevel;
}
@@ -110,7 +128,6 @@ public class MultithreadedTranslationTests {
// GIVEN
int inputLines = 10000;
- joshuaConfig.use_structured_output = true; // Enabled alignments.
StringBuilder sb = new StringBuilder();
for (int i = 0; i < inputLines; i++) {
sb.append(INPUT + "\n");
@@ -120,7 +137,7 @@ public class MultithreadedTranslationTests {
// engine.
TranslationRequestStream req = new TranslationRequestStream(
new BufferedReader(new InputStreamReader(new ByteArrayInputStream(sb.toString()
- .getBytes(StandardCharsets.UTF_8)))), joshuaConfig);
+ .getBytes(StandardCharsets.UTF_8)))), decoder.getFlags());
ByteArrayOutputStream output = new ByteArrayOutputStream();
@@ -159,7 +176,7 @@ public class MultithreadedTranslationTests {
public void givenDecodeAllCalled_whenRuntimeExceptionThrown_thenPropagate() throws IOException {
// GIVEN
// A spy request stream that will cause an exception to be thrown on a threadpool thread
- TranslationRequestStream spyReq = Mockito.spy(new TranslationRequestStream(null, joshuaConfig));
+ TranslationRequestStream spyReq = Mockito.spy(new TranslationRequestStream(null, decoder.getFlags()));
doReturn(createSentenceSpyWithRuntimeExceptions()).when(spyReq).next();
// WHEN
@@ -172,7 +189,7 @@ public class MultithreadedTranslationTests {
}
private Sentence createSentenceSpyWithRuntimeExceptions() {
- Sentence sent = new Sentence(INPUT, 0, joshuaConfig);
+ Sentence sent = new Sentence(INPUT, 0, decoder.getFlags());
Sentence spy = Mockito.spy(sent);
Mockito.when(spy.target()).thenThrow(new RuntimeException(EXCEPTION_MESSAGE));
return spy;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java b/joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
index e4dd435..d5504f5 100644
--- a/joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
@@ -18,11 +18,13 @@
*/
package org.apache.joshua.system;
+import static com.typesafe.config.ConfigFactory.parseString;
+import static com.typesafe.config.ConfigValueFactory.fromAnyRef;
+
import java.util.Arrays;
import java.util.List;
import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.Translation;
import org.apache.joshua.decoder.segment_file.Sentence;
import org.testng.Assert;
@@ -30,6 +32,8 @@ import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
+import com.typesafe.config.Config;
+
/**
* Integration test for the complete Joshua decoder using a toy grammar that translates
* a bunch of capital letters to lowercase letters. Rules in the test grammar
@@ -40,7 +44,6 @@ import org.testng.annotations.Test;
*/
public class StructuredOutputTest {
- private JoshuaConfiguration joshuaConfig = null;
private Decoder decoder = null;
private Translation translation = null;
private static final String input = "A K B1 U Z1 Z2 B2 C";
@@ -55,27 +58,19 @@ public class StructuredOutputTest {
@BeforeMethod
public void setUp() throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.search_algorithm = "cky";
- joshuaConfig.mark_oovs = false;
- joshuaConfig.pop_limit = 100;
- joshuaConfig.use_unique_nbest = false;
- joshuaConfig.include_align_index = false;
- joshuaConfig.topN = 0;
- joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path src/test/resources/wa_grammar");
- joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path src/test/resources/grammar.glue");
- joshuaConfig.goal_symbol = "[GOAL]";
- joshuaConfig.default_non_terminal = "[X]";
- joshuaConfig.features.add("OOVPenalty");
- joshuaConfig.weights.add("pt_0 -1");
- joshuaConfig.weights.add("pt_1 -1");
- joshuaConfig.weights.add("pt_2 -1");
- joshuaConfig.weights.add("pt_3 -1");
- joshuaConfig.weights.add("pt_4 -1");
- joshuaConfig.weights.add("pt_5 -1");
- joshuaConfig.weights.add("glue_0 -1");
- joshuaConfig.weights.add("OOVPenalty 2");
- decoder = new Decoder(joshuaConfig);
+ Config weights = parseString(
+ "weights = {pt_0=-1, pt_1=-1, pt_2=-1, pt_3=-1, pt_4=-1, pt_5=-1, glue_0=-1, OOVPenalty=2}");
+ Config features = parseString("feature_functions = [{class=OOVPenalty}]");
+ Config grammars = parseString("grammars=[{class=TextGrammar, owner=pt, span_limit=20, path=src/test/resources/wa_grammar},"
+ + "{class=TextGrammar, owner=glue, span_limit=-1, path=src/test/resources/grammar.glue}]");
+ Config flags = weights
+ .withFallback(features)
+ .withFallback(grammars)
+ .withFallback(Decoder.getDefaultFlags())
+ .withValue("top_n", fromAnyRef(0))
+ .withValue("use_unique_nbest", fromAnyRef(false))
+ .withValue("output_format", fromAnyRef("%s | %a"));
+ decoder = new Decoder(flags);
}
@AfterMethod
@@ -85,8 +80,8 @@ public class StructuredOutputTest {
translation = null;
}
- private Translation decode(String input) {
- Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ private Translation decode(String input, Config flags) {
+ Sentence sentence = new Sentence(input, 0, flags);
return decoder.decode(sentence);
}
@@ -94,14 +89,12 @@ public class StructuredOutputTest {
public void test() {
// test standard output
- joshuaConfig.use_structured_output = false;
- joshuaConfig.outputFormat = "%s | %a ";
- translation = decode(input);
+ translation = decode(input,
+ decoder.getFlags().withValue("use_structured_output", fromAnyRef(false)));
Assert.assertEquals(translation.toString().trim(), expectedTranslation + " | " + expectedWordAlignmentString);
// test structured output
- joshuaConfig.use_structured_output = true; // set structured output creation to true
- translation = decode(input);
+ translation = decode(input, decoder.getFlags().withValue("use_structured_output", fromAnyRef(true)));
Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationString(), expectedTranslation);
Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationTokens(), Arrays.asList(expectedTranslation.split("\\s+")));
Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationScore(), expectedScore, 0.00001);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java b/joshua-core/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
index 308d517..5d122e5 100644
--- a/joshua-core/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
@@ -27,7 +27,6 @@ import java.util.List;
import java.util.Map;
import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.StructuredTranslation;
import org.apache.joshua.decoder.Translation;
import org.apache.joshua.decoder.segment_file.Sentence;
@@ -35,6 +34,8 @@ import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
+import com.typesafe.config.Config;
+
/**
* Integration test for the complete Joshua decoder using a toy grammar that translates
* a bunch of capital letters to lowercase letters. Rules in the test grammar
@@ -46,7 +47,6 @@ import org.testng.annotations.Test;
*/
public class StructuredTranslationTest {
- private JoshuaConfiguration joshuaConfig = null;
private Decoder decoder = null;
private static final String INPUT = "A K B1 U Z1 Z2 B2 C";
private static final String EXPECTED_TRANSLATION = "a b n1 u z c1 k1 k2 k3 n1 n2 n3 c2";
@@ -102,8 +102,8 @@ public class StructuredTranslationTest {
decoder = null;
}
- private Translation decode(String input) {
- Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ private Translation decode(String input, Config flags) {
+ Sentence sentence = new Sentence(input, 0, flags);
return decoder.decode(sentence);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/resources/kbest_extraction/joshua.config
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/kbest_extraction/joshua.config b/joshua-core/src/test/resources/kbest_extraction/joshua.config
index 080fc09..da262ae 100644
--- a/joshua-core/src/test/resources/kbest_extraction/joshua.config
+++ b/joshua-core/src/test/resources/kbest_extraction/joshua.config
@@ -1,27 +1,31 @@
-feature-function = StateMinimizingLanguageModel -lm_type kenlm -lm_order 5 -lm_file src/test/resources/kbest_extraction/lm.gz
+feature_functions = [
+ {class=StateMinimizingLanguageModel, lm_type=kenlm, lm_order=5, lm_file=src/test/resources/kbest_extraction/lm.gz},
+ {class=OOVPenalty}
+]
-tm = thrax -owner pt -maxspan 12 -path src/test/resources/kbest_extraction/grammar
-tm = thrax -owner glue -maxspan -1 -path src/test/resources/kbest_extraction/glue-grammar
+grammars = [
+ {class=TextGrammar, owner=pt, span_limit=12, path=src/test/resources/kbest_extraction/grammar},
+ {class=TextGrammar, owner=glue, span_limit=-1, path=src/test/resources/kbest_extraction/glue-grammar}
+]
mark_oovs=false
-#tm config
default_non_terminal=X
-goalSymbol=GOAL
+goal_symbol=GOAL
#pruning config
-pop-limit=100
+pop_limit=100
#nbest config
use_unique_nbest=true
-top-n = 3126
+top_n = 3126
-#feature_function = WordPenalty
-feature_function = OOVPenalty
# Model Weights ####
-lm_0 1
-pt_0 1
-glue_0 1
-OOVPenalty 10000
+weights = {
+ lm_0 = 1
+ pt_0 = 1
+ glue_0 = 1
+ OOVPenalty = 10000
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/resources/phrase_decoder/config
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/config b/joshua-core/src/test/resources/phrase_decoder/config
index 5bf120f..30b6664 100644
--- a/joshua-core/src/test/resources/phrase_decoder/config
+++ b/joshua-core/src/test/resources/phrase_decoder/config
@@ -1,29 +1,35 @@
-tm = moses -owner pt -maxspan 0 -path src/test/resources/phrase_decoder/rules.1.gz -max-source-len 5
-feature-function = StateMinimizingLanguageModel -lm_order 5 -lm_file src/test/resources/phrase_decoder/lm.1.gz
+grammars = [
+ {class=TextGrammar, owner=pt, span_limit=0, max_source_len=5, path=src/test/resources/phrase_decoder/rules.1.gz},
+]
-search = stack
+search_algorithm=stack
-mark-oovs = false
-pop-limit = 10
-top-n = 1
+mark_oovs = false
+pop_limit = 10
+top_n = 1
-output-format = %i ||| %s ||| %f ||| %c
+output_format = %i ||| %s ||| %f ||| %c
-include-align-index = true
-reordering-limit = 6
+include_align_index = true
+reordering_limit = 6
-# And these are the feature functions to activate.
-feature-function = OOVPenalty
-feature-function = WordPenalty
-feature-function = Distortion
-feature-function = PhrasePenalty -owner pt
-OOVPenalty 1.0
-Distortion 0.114849
-WordPenalty -0.201544
-PhrasePenalty -0.236965
-pt_0 0.0370068
-pt_1 0.0495759
-pt_2 0.196742
-pt_3 0.0745423
-lm_0 0.204412452147565
+feature_functions = [
+ {class=LanguageModel, lm_type=kenlm, lm_order=5, lm_file=src/test/resources/phrase_decoder/lm.1.gz},
+ {class=OOVPenalty},
+ {class=WordPenalty},
+ {class=Distortion},
+ {class=PhrasePenalty, owner=pt},
+]
+
+weights = {
+ OOVPenalty = 1
+ Distortion = 0.114849
+ WordPenalty = -0.201544
+ PhrasePenalty = -0.236965
+ pt_0 = 0.0370068
+ pt_1 = 0.0495759
+ pt_2 = 0.196742
+ pt_3 = 0.0745423
+ lm_0 = 0.204412452147565
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b8256c71/joshua-core/src/test/resources/phrase_decoder/constrained.config
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/resources/phrase_decoder/constrained.config b/joshua-core/src/test/resources/phrase_decoder/constrained.config
index de75422..13aace4 100644
--- a/joshua-core/src/test/resources/phrase_decoder/constrained.config
+++ b/joshua-core/src/test/resources/phrase_decoder/constrained.config
@@ -1,28 +1,34 @@
-tm = moses pt 0 src/test/resources/phrase_decoder/rules.1.gz
+grammars = [
+ {class=TextGrammar, owner=pt, span_limit=0, path=src/test/resources/phrase_decoder/rules.1.gz},
+]
-lm = kenlm 5 true false 100 src/test/resources/phrase_decoder/lm.1.gz
+mark_oovs = false
+pop_limit = 10
+top_n = 5
-mark-oovs = false
-pop-limit = 10
-top-n = 5
+output_format = %i ||| %s ||| %f ||| %c
-output-format = %i ||| %s ||| %f ||| %c
+include_align_index = true
+reordering_limit = 10
-include-align-index = true
-reordering-limit = 10
-# And these are the feature functions to activate.
-feature-function = OOVPenalty
-feature-function = WordPenalty
-feature-function = Distortion
-feature-function = PhrasePenalty -owner pt
+feature_functions = [
+ {class=LanguageModel, lm_type=kenlm, lm_order=5, lm_file=src/test/resources/phrase_decoder/lm.1.gz},
+ {class=OOVPenalty},
+ {class=WordPenalty},
+ {class=Distortion},
+ {class=PhrasePenalty, owner=pt},
+]
-OOVPenalty 1.0
-Distortion 0.114849
-WordPenalty -0.201544
-PhrasePenalty -0.236965
-pt_0 0.0370068
-pt_1 0.0495759
-pt_2 0.196742
-pt_3 0.0745423
-lm_0 0.204412452147565
+
+weights = {
+ OOVPenalty = 1
+ Distortion = 0.114849
+ WordPenalty = -0.201544
+ PhrasePenalty = -0.236965
+ pt_0 = 0.0370068
+ pt_1 = 0.0495759
+ pt_2 = 0.196742
+ pt_3 = 0.0745423
+ lm_0 = 0.204412452147565
+}
\ No newline at end of file