You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/17 12:27:59 UTC
[10/14] incubator-joshua git commit: Fixed Unit tests for new
configuration system
Fixed Unit tests for new configuration system
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/26dcdb67
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/26dcdb67
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/26dcdb67
Branch: refs/heads/7_confsystem
Commit: 26dcdb67c9d73af10725b638605d82d06d31b9b6
Parents: 7e96f73
Author: Hieber, Felix <fh...@amazon.de>
Authored: Thu Sep 15 13:22:33 2016 +0200
Committer: Hieber, Felix <fh...@amazon.de>
Committed: Thu Sep 15 19:10:25 2016 +0200
----------------------------------------------------------------------
.../java/org/apache/joshua/decoder/Decoder.java | 22 +
.../apache/joshua/decoder/JoshuaDecoder.java | 17 +-
.../joshua/decoder/cky/ConstrainedTest.java | 1 -
.../joshua/decoder/cky/LeftStateTest.java | 14 +-
.../joshua/decoder/cky/LowercaseTest.java | 30 +-
.../org/apache/joshua/decoder/cky/TestUtil.java | 10 +-
.../decoder/ff/lm/LanguageModelFFTest.java | 18 +-
.../lm/berkeley_lm/LMGrammarBerkeleyTest.java | 36 +-
.../class_lm/ClassBasedLanguageModelTest.java | 21 +-
.../io/TranslationRequestStreamTest.java | 16 +-
.../kbest_extraction/KBestExtractionTest.java | 6 +-
.../segment_file/AlmostTooLongSentenceTest.java | 21 +-
.../decoder/segment_file/SentenceTest.java | 27 +-
.../org/apache/joshua/lattice/LatticeTest.java | 8 +-
.../org/apache/joshua/packed/Benchmark.java | 132 -
.../org/apache/joshua/packed/CountRules.java | 110 -
.../org/apache/joshua/packed/PrintRules.java | 199 -
.../test/java/org/apache/joshua/packed/README | 6 -
.../org/apache/joshua/packed/VocabTest.java | 58 -
.../java/org/apache/joshua/packed/packer.config | 6 -
.../java/org/apache/joshua/packed/small_grammar | 20000 -----------------
.../test/java/org/apache/joshua/packed/test.sh | 20 -
.../apache/joshua/system/LmOovFeatureTest.java | 18 +-
.../resources/decoder/left-state/joshua.config | 66 +-
.../resources/decoder/left-state/output.gold | 42 +-
.../resources/decoder/lowercaser/joshua.config | 155 +-
.../src/test/resources/lm_oov/joshua.config | 28 +-
27 files changed, 241 insertions(+), 20846 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java b/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
index 623bffc..1b21bb5 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -23,6 +23,7 @@ import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
import static org.apache.joshua.decoder.ff.tm.hash_based.TextGrammarFactory.createCustomGrammar;
import static org.apache.joshua.decoder.ff.tm.hash_based.TextGrammarFactory.createGlueTextGrammar;
import static org.apache.joshua.util.Constants.spaceSeparator;
+import static org.apache.joshua.util.FormatUtils.ensureNonTerminalBrackets;
import java.io.File;
import java.io.IOException;
@@ -63,6 +64,7 @@ import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigParseOptions;
import com.typesafe.config.ConfigValue;
+import com.typesafe.config.ConfigValueFactory;
/**
* This class handles decoder initialization and the complication introduced by multithreading.
@@ -129,6 +131,24 @@ public class Decoder {
}
/**
+ * Returns a fully-specified decoder flags {@link Config} from the given
+ * Config. This is the preferable way to include default configuration and ensures
+ * certain format correctness at runtime.
+ */
+ public static Config createDecoderFlags(final Config userFlags) {
+ final Config defaultFlags = Decoder.getDefaultFlags();
+ final Config allFlags = userFlags.resolveWith(defaultFlags).withFallback(defaultFlags);
+ return allFlags
+ .withValue("default_non_terminal", ConfigValueFactory.fromAnyRef(ensureNonTerminalBrackets(allFlags.getString("default_non_terminal"))))
+ .withValue("goal_symbol", ConfigValueFactory.fromAnyRef(ensureNonTerminalBrackets(allFlags.getString("goal_symbol"))));
+ }
+
+ public static Config createDecoderFlagsFromFile(final File fileName) {
+ final ConfigParseOptions options = ConfigParseOptions.defaults().setAllowMissing(false);
+ return createDecoderFlags(ConfigFactory.parseFile(fileName, options));
+ }
+
+ /**
* Returns the DecoderConfig
*/
public DecoderConfig getDecoderConfig() {
@@ -256,6 +276,8 @@ public class Decoder {
private DecoderConfig initialize(final Config config) {
LOG.info("Initializing decoder ...");
+ LOG.info("Default non-terminal: {}", config.getString("default_non_terminal"));
+ LOG.info("Goal symbol: {}", config.getString("goal_symbol"));
long initTime = System.currentTimeMillis();
/*
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java b/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
index d00bc4d..1c90924 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
@@ -47,7 +47,6 @@ import com.google.common.base.Throwables;
import com.sun.net.httpserver.HttpServer;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
-import com.typesafe.config.ConfigParseOptions;
import com.typesafe.config.ConfigRenderOptions;
/**
@@ -78,18 +77,12 @@ public class JoshuaDecoder {
* Returns the flags composed of default config, given config, and commandline overrides.
*/
private Config getFlags() {
- final ConfigParseOptions options = ConfigParseOptions.defaults().setAllowMissing(false);
- final Config defaultConfig = Decoder.getDefaultFlags();
- Config givenConfig = ConfigFactory.empty();
- if (configFile != null) {
- givenConfig = ConfigFactory.parseFile(configFile, options).resolveWith(defaultConfig);
- LOG.info("Config: {}", configFile.toString());
+ final Config commandLineOverrides = ConfigFactory.parseMap(overrides, "CmdLine overrides");
+ if (configFile == null) {
+ return commandLineOverrides.withFallback(Decoder.getDefaultFlags()).resolve();
+ } else {
+ return commandLineOverrides.withFallback(Decoder.createDecoderFlagsFromFile(configFile)).resolve();
}
- final Config config = ConfigFactory.parseMap(overrides, "CmdLine overrides")
- .resolve()
- .withFallback(givenConfig)
- .withFallback(defaultConfig);
- return config;
}
private static void printFlags(Config flags) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/decoder/cky/ConstrainedTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/ConstrainedTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/ConstrainedTest.java
index c8d2304..7a2a7b4 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/ConstrainedTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/ConstrainedTest.java
@@ -25,7 +25,6 @@ import static org.testng.Assert.assertEquals;
import java.util.List;
import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.util.io.KenLmTestUtil;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.Test;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
index 602808c..eb24102 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LeftStateTest.java
@@ -22,17 +22,16 @@ import static org.apache.joshua.decoder.cky.TestUtil.decodeList;
import static org.apache.joshua.decoder.cky.TestUtil.loadStringsFromFile;
import static org.testng.Assert.assertEquals;
+import java.io.File;
import java.util.List;
import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.util.io.KenLmTestUtil;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.Test;
public class LeftStateTest {
- private JoshuaConfiguration joshuaConfig;
private Decoder decoder;
@AfterMethod
@@ -49,17 +48,16 @@ public class LeftStateTest {
List<String> inputStrings = loadStringsFromFile("src/test/resources/decoder/left-state/input.bn");
// When
- configureDecoder("src/test/resources/decoder/left-state/joshua.config");
- List<String> decodedStrings = decodeList(inputStrings, decoder, joshuaConfig);
+ configureDecoder(new File("src/test/resources/decoder/left-state/joshua.config"));
+ List<String> decodedStrings = decodeList(inputStrings, decoder);
// Then
List<String> goldStrings = loadStringsFromFile("src/test/resources/decoder/left-state/output.gold");
+
assertEquals(decodedStrings, goldStrings);
}
- public void configureDecoder(String pathToConfig) throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.readConfigFile(pathToConfig);
- KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig));
+ public void configureDecoder(File pathToConfig) throws Exception {
+ KenLmTestUtil.Guard(() -> decoder = new Decoder(Decoder.createDecoderFlagsFromFile(pathToConfig)));
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
index 2d8cb49..2efe2e7 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/LowercaseTest.java
@@ -21,11 +21,15 @@ package org.apache.joshua.decoder.cky;
import static org.apache.joshua.decoder.cky.TestUtil.translate;
import static org.testng.Assert.assertEquals;
+import java.io.File;
+
import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.Test;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigValueFactory;
+
public class LowercaseTest {
private static final String INPUT_ALL_UPPERCASED = "ELLA";
@@ -36,9 +40,8 @@ public class LowercaseTest {
private static final String GOLD_CAPITALIZED = "She";
private static final String GOLD_ALL_UPPERCASED = "SHE";
- private static final String JOSHUA_CONFIG_PATH = "src/test/resources/decoder/lowercaser/joshua.config";
+ private static final File JOSHUA_CONFIG_PATH = new File("src/test/resources/decoder/lowercaser/joshua.config");
- private JoshuaConfiguration joshuaConfig;
private Decoder decoder;
/**
@@ -49,7 +52,7 @@ public class LowercaseTest {
@Test
public void givenAllUppercasedInput_whenNotLowercasing_thenLowercasedRuleNotFound() throws Exception {
setUp(false, false, false);
- String output = translate(INPUT_ALL_UPPERCASED, decoder, joshuaConfig);
+ String output = translate(INPUT_ALL_UPPERCASED, decoder);
assertEquals(output.trim(), GOLD_UNTRANSLATED_ALL_UPPERCASED);
}
@@ -60,7 +63,7 @@ public class LowercaseTest {
@Test
public void givenAllUppercasedInput_whenLowercasing_thenLowercasedRuleFound() throws Exception {
setUp(true, false, false);
- String output = translate(INPUT_ALL_UPPERCASED, decoder, joshuaConfig);
+ String output = translate(INPUT_ALL_UPPERCASED, decoder);
assertEquals(output.trim(), GOLD_LOWERCASED);
}
@@ -71,7 +74,7 @@ public class LowercaseTest {
@Test
public void givenCapitalizedInput_whenLowercasingAndProjecting_thenLowercased() throws Exception {
setUp(true, true, false);
- String output = translate(INPUT_CAPITALIZED, decoder, joshuaConfig);
+ String output = translate(INPUT_CAPITALIZED, decoder);
assertEquals(output.trim(), GOLD_LOWERCASED);
}
@@ -82,7 +85,7 @@ public class LowercaseTest {
@Test
public void givenCapitalizedInput_whenLowercasingAndOutputFormatCapitalization_thenCapitalized() throws Exception {
setUp(true, true, true);
- String output = translate(INPUT_CAPITALIZED, decoder, joshuaConfig);
+ String output = translate(INPUT_CAPITALIZED, decoder);
assertEquals(output.trim(), GOLD_CAPITALIZED);
}
@@ -93,17 +96,16 @@ public class LowercaseTest {
@Test
public void givenAllUppercasedInput_whenLowercasingAndProjecting_thenAllUppercased() throws Exception {
setUp(true, true, false);
- String output = translate(INPUT_ALL_UPPERCASED, decoder, joshuaConfig);
+ String output = translate(INPUT_ALL_UPPERCASED, decoder);
assertEquals(output.trim(), GOLD_ALL_UPPERCASED);
}
public void setUp(boolean lowercase, boolean projectCase, boolean capitalize) throws Exception {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.readConfigFile(JOSHUA_CONFIG_PATH);
- joshuaConfig.lowercase = lowercase;
- joshuaConfig.project_case = projectCase;
- joshuaConfig.outputFormat = capitalize ? "%S" : "%s";
- decoder = new Decoder(joshuaConfig);
+ Config config = Decoder.createDecoderFlagsFromFile(JOSHUA_CONFIG_PATH)
+ .withValue("lowercase", ConfigValueFactory.fromAnyRef(lowercase))
+ .withValue("project_case", ConfigValueFactory.fromAnyRef(projectCase))
+ .withValue("output_format", ConfigValueFactory.fromAnyRef(capitalize ? "%S" : "%s"));
+ decoder = new Decoder(config);
}
@AfterMethod
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
index 35800c6..349d428 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
@@ -27,7 +27,6 @@ import java.util.List;
import java.util.stream.Collectors;
import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.segment_file.Sentence;
public class TestUtil {
@@ -58,12 +57,11 @@ public class TestUtil {
* (separated by N_BEST_SEPARATOR), then each translation of the
* n-best list has its own entry in the returned list.
*/
- public static List<String> decodeList(List<String> inputStrings, Decoder decoder,
- JoshuaConfiguration joshuaConfig) {
+ public static List<String> decodeList(List<String> inputStrings, Decoder decoder) {
final List<String> decodedStrings = new ArrayList<>();
for (String inputString : inputStrings) {
- final Sentence sentence = new Sentence(inputString, 0, joshuaConfig);
+ final Sentence sentence = new Sentence(inputString, 0, decoder.getDecoderConfig().getFlags());
final String[] nBestList = decoder.decode(sentence).toString().split(N_BEST_SEPARATOR);
decodedStrings.addAll(Arrays.asList(nBestList));
}
@@ -79,8 +77,8 @@ public class TestUtil {
* @param joshuaConfig
* @return
*/
- public static String translate(String input, Decoder decoder, JoshuaConfiguration joshuaConfig) {
- final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ public static String translate(String input, Decoder decoder) {
+ final Sentence sentence = new Sentence(input, 0, decoder.getDecoderConfig().getFlags());
return decoder.decode(sentence).toString();
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
index 89f1e81..5394d74 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
@@ -25,7 +25,6 @@ import static org.testng.Assert.assertEquals;
import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.ff.FeatureMap;
import org.apache.joshua.decoder.ff.FeatureVector;
import org.apache.joshua.decoder.ff.state_maintenance.NgramDPState;
@@ -33,22 +32,29 @@ import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
+import com.google.common.collect.ImmutableMap;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+
public class LanguageModelFFTest {
private static final float WEIGHT = 0.5f;
+ private static final Config FF_CONFIG =
+ ConfigFactory.parseMap(
+ ImmutableMap.of(
+ "lm_type", "berkeleylm",
+ "lm_order", "2",
+ "lm_file", "./src/test/resources/lm/berkeley/lm",
+ "state_index", "0"));
private LanguageModelFF ff;
@BeforeMethod
public void setUp() {
Decoder.resetGlobalState();
-
FeatureVector weights = new FeatureVector(2);
weights.put(FeatureMap.hashFeature("lm_0"), WEIGHT);
- String[] args = {"-lm_type", "berkeleylm", "-lm_order", "2", "-lm_file", "./src/test/resources/lm/berkeley/lm"};
-
- JoshuaConfiguration config = new JoshuaConfiguration();
- ff = new LanguageModelFF(weights, args, config);
+ ff = new LanguageModelFF(FF_CONFIG, weights);
}
@AfterMethod
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
index 2d6aac3..0fbcab6 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
@@ -18,15 +18,18 @@
*/
package org.apache.joshua.decoder.ff.lm.berkeley_lm;
+import static org.testng.Assert.assertEquals;
+
import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.Translation;
import org.apache.joshua.decoder.segment_file.Sentence;
import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
-import static org.testng.Assert.assertEquals;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
/**
* Replacement for test/lm/berkeley/test.sh regression test
@@ -37,9 +40,8 @@ public class LMGrammarBerkeleyTest {
private static final String INPUT = "the chat-rooms";
private static final String EXPECTED_OUTPUT = "lm_0=-7.152632 glue_0=-2.000000\n";
private static final String EXPECTED_OUTPUT_WITH_OOV = "lm_0_oov=0.000000 lm_0=-7.152632 glue_0=-2.000000\n";
- private static final String[] OPTIONS = "-v 1 -output-format %f".split(" ");
+ private static final Config DECODER_FLAGS = ConfigFactory.parseString("output_format=%f").withFallback(Decoder.getDefaultFlags());
- private JoshuaConfiguration joshuaConfig;
private Decoder decoder;
@DataProvider(name = "languageModelFiles")
@@ -49,6 +51,11 @@ public class LMGrammarBerkeleyTest {
{"src/test/resources/berkeley_lm/lm.berkeleylm"},
{"src/test/resources/berkeley_lm/lm.berkeleylm.gz"}};
}
+
+ @BeforeMethod
+ public void setUp() {
+ Decoder.resetGlobalState();
+ }
@AfterMethod
public void tearDown() throws Exception {
@@ -57,25 +64,28 @@ public class LMGrammarBerkeleyTest {
@Test(dataProvider = "languageModelFiles")
public void verifyLM(String lmFile) {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.processCommandLineOptions(OPTIONS);
- joshuaConfig.features.add("LanguageModel -lm_type berkeleylm -lm_order 2 -lm_file " + lmFile);
- decoder = new Decoder(joshuaConfig);
+ final Config config = ConfigFactory
+ .parseString(
+ String.format(
+ "feature_functions=[{ class=LanguageModel, lm_type=berkeleylm, lm_order=2, lm_file=%s }]",
+ lmFile))
+ .withFallback(DECODER_FLAGS);
+ decoder = new Decoder(config);
final String translation = decode(INPUT).toString();
assertEquals(translation, EXPECTED_OUTPUT);
}
private Translation decode(String input) {
- final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ final Sentence sentence = new Sentence(input, 0, decoder.getDecoderConfig().getFlags());
return decoder.decode(sentence);
}
@Test
public void givenLmWithOovFeature_whenDecoder_thenCorrectFeaturesReturned() {
- joshuaConfig = new JoshuaConfiguration();
- joshuaConfig.processCommandLineOptions(OPTIONS);
- joshuaConfig.features.add("LanguageModel -lm_type berkeleylm -oov_feature -lm_order 2 -lm_file src/test/resources/berkeley_lm/lm");
- decoder = new Decoder(joshuaConfig);
+ final Config config = ConfigFactory
+ .parseString("feature_functions=[{ class=LanguageModel, oov_feature=true, lm_type=berkeleylm, lm_order=2, lm_file=src/test/resources/berkeley_lm/lm }]")
+ .withFallback(DECODER_FLAGS);
+ decoder = new Decoder(config);
final String translation = decode(INPUT).toString();
assertEquals(translation, EXPECTED_OUTPUT_WITH_OOV);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
index 2067f30..7171b54 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
@@ -23,7 +23,6 @@ import static org.testng.Assert.assertTrue;
import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.ff.FeatureMap;
import org.apache.joshua.decoder.ff.FeatureVector;
import org.apache.joshua.decoder.ff.lm.LanguageModelFF;
@@ -34,27 +33,33 @@ import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
+import com.google.common.collect.ImmutableMap;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+
/**
* This unit test relies on KenLM. If the KenLM library is not found when the test is run all tests will be skipped.
*/
public class ClassBasedLanguageModelTest {
private static final float WEIGHT = 0.5f;
+ private static final Config FF_CONFIG =
+ ConfigFactory.parseMap(
+ ImmutableMap.of(
+ "lm_type", "kenlm",
+ "lm_order", "9",
+ "lm_file", "src/test/resources/lm/class_lm/class_lm_9gram.gz",
+ "class_map", "src/test/resources/lm/class_lm/class.map",
+ "state_index", "0"));
private LanguageModelFF ff;
@BeforeMethod
public void setUp() {
Decoder.resetGlobalState();
-
FeatureVector weights = new FeatureVector(1);
weights.put(FeatureMap.hashFeature("lm_0"), WEIGHT);
- String[] args = { "-lm_type", "kenlm", "-lm_order", "9",
- "-lm_file", "src/test/resources/lm/class_lm/class_lm_9gram.gz",
- "-class_map", "src/test/resources/lm/class_lm/class.map" };
-
- JoshuaConfiguration config = new JoshuaConfiguration();
- KenLmTestUtil.Guard(() -> ff = new LanguageModelFF(weights, args, config));
+ KenLmTestUtil.Guard(() -> ff = new LanguageModelFF(FF_CONFIG, weights));
}
@AfterMethod
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestStreamTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestStreamTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestStreamTest.java
index a09aebb..9af6c32 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestStreamTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestStreamTest.java
@@ -18,11 +18,13 @@
*/
package org.apache.joshua.decoder.io;
-import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Decoder;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
+import com.typesafe.config.Config;
+
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
@@ -47,7 +49,7 @@ import static org.testng.Assert.fail;
public class TranslationRequestStreamTest {
- private final JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+ private static final Config CONFIG = Decoder.getDefaultFlags();
@BeforeMethod
public void createTranslationRequest() throws Exception {
@@ -82,7 +84,7 @@ public class TranslationRequestStreamTest {
public void testSize_uponConstruction() {
InputStream in = mock(InputStream.class);
TranslationRequestStream request = new TranslationRequestStream(
- new BufferedReader(new InputStreamReader(in, Charset.defaultCharset())), joshuaConfiguration);
+ new BufferedReader(new InputStreamReader(in, Charset.defaultCharset())), CONFIG);
assertEquals(request.size(), 0);
}
@@ -96,7 +98,7 @@ public class TranslationRequestStreamTest {
byte[] data = "1".getBytes();
ByteArrayInputStream input = new ByteArrayInputStream(data);
TranslationRequestStream request = new TranslationRequestStream(
- new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+ new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), CONFIG);
request.next();
assertEquals(request.size(), 1);
}
@@ -111,7 +113,7 @@ public class TranslationRequestStreamTest {
byte[] data = "\n".getBytes();
ByteArrayInputStream input = new ByteArrayInputStream(data);
TranslationRequestStream request = new TranslationRequestStream(
- new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+ new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), CONFIG);
request.next();
assertEquals(request.size(), 1);
}
@@ -126,7 +128,7 @@ public class TranslationRequestStreamTest {
byte[] data = "\n\n".getBytes();
ByteArrayInputStream input = new ByteArrayInputStream(data);
TranslationRequestStream request = new TranslationRequestStream(
- new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+ new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), CONFIG);
request.next();
request.next();
assertEquals(request.size(), 2);
@@ -142,7 +144,7 @@ public class TranslationRequestStreamTest {
byte[] data = "\n\n".getBytes();
ByteArrayInputStream input = new ByteArrayInputStream(data);
TranslationRequestStream request = new TranslationRequestStream(
- new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+ new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), CONFIG);
assertEquals(request.next().source(), "");
assertEquals(request.next().source(), "");
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
index 172632b..8be422e 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
@@ -19,7 +19,6 @@
package org.apache.joshua.decoder.kbest_extraction;
import org.apache.joshua.decoder.Decoder;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.Translation;
import org.apache.joshua.decoder.segment_file.Sentence;
import org.apache.joshua.util.io.KenLmTestUtil;
@@ -38,11 +37,8 @@ import static java.nio.file.Files.readAllBytes;
import static org.testng.Assert.assertEquals;
/**
- * Reimplements the kbest extraction regression test
- * TODO (fhieber): this test strangely only works with StateMinimizing KenLM.
- * This is to be investigated
+ * Reimplements the kbest extraction regression test.
*/
-
public class KBestExtractionTest {
private static final String CONFIG = "src/test/resources/kbest_extraction/joshua.config";
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
index 3b2852c..d2c1b84 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
@@ -20,22 +20,23 @@ package org.apache.joshua.decoder.segment_file;
import org.testng.annotations.Test;
+import com.typesafe.config.Config;
+
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.AfterMethod;
import static org.testng.Assert.*;
-import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Decoder;
public class AlmostTooLongSentenceTest {
- private JoshuaConfiguration joshuaConfiguration;
private String almostTooLongInput;
private Sentence sentencePlusTarget;
+ private static final Config FLAGS = Decoder.getDefaultFlags();
@BeforeMethod
public void setUp() {
- joshuaConfiguration = new JoshuaConfiguration();
- almostTooLongInput = concatStrings(".", joshuaConfiguration.maxlen);
- sentencePlusTarget = new Sentence(this.almostTooLongInput + " ||| target side", 0,joshuaConfiguration);
+ almostTooLongInput = concatStrings(".", FLAGS.getInt("maximum_sentence_length"));
+ sentencePlusTarget = new Sentence(this.almostTooLongInput + " ||| target side", 0, FLAGS);
}
@AfterMethod
@@ -44,18 +45,18 @@ public class AlmostTooLongSentenceTest {
@Test
public void testConstructor() {
- Sentence sent = new Sentence("", 0,joshuaConfiguration);
+ Sentence sent = new Sentence("", 0, FLAGS);
assertNotNull(sent);
}
@Test
public void testEmpty() {
- assertTrue(new Sentence("", 0,joshuaConfiguration).isEmpty());
+ assertTrue(new Sentence("", 0, FLAGS).isEmpty());
}
@Test
public void testNotEmpty() {
- assertFalse(new Sentence("hello , world", 0, joshuaConfiguration).isEmpty());
+ assertFalse(new Sentence("hello , world", 0, FLAGS).isEmpty());
}
/**
@@ -75,12 +76,12 @@ public class AlmostTooLongSentenceTest {
@Test
public void testAlmostButNotTooManyTokensSourceOnlyNotEmpty() {
- assertFalse(new Sentence(this.almostTooLongInput, 0, joshuaConfiguration).isEmpty());
+ assertFalse(new Sentence(this.almostTooLongInput, 0, FLAGS).isEmpty());
}
@Test
public void testAlmostButNotTooManyTokensSourceOnlyTargetNull() {
- assertNull(new Sentence(this.almostTooLongInput, 0, joshuaConfiguration).target);
+ assertNull(new Sentence(this.almostTooLongInput, 0, FLAGS).target);
}
@Test
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
index 8e0d171..b1d2bed 100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
@@ -18,22 +18,25 @@
*/
package org.apache.joshua.decoder.segment_file;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.testng.annotations.Test;
+
+import com.typesafe.config.Config;
+
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.AfterMethod;
import static org.testng.Assert.*;
+import org.apache.joshua.decoder.Decoder;
+
public class SentenceTest {
private String tooLongInput;
- private final JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
-
+ private static final Config FLAGS = Decoder.getDefaultFlags();
@BeforeMethod
public void setUp() {
- tooLongInput = concatTokens("*", joshuaConfiguration.maxlen * 2);
+ tooLongInput = concatTokens("*", FLAGS.getInt("maximum_sentence_length") * 2);
}
@AfterMethod
@@ -42,18 +45,18 @@ public class SentenceTest {
@Test
public void testConstructor() {
- Sentence sent = new Sentence("", 0, joshuaConfiguration);
+ Sentence sent = new Sentence("", 0, FLAGS);
assertNotNull(sent);
}
@Test
public void testEmpty() {
- assertTrue(new Sentence("", 0, joshuaConfiguration).isEmpty());
+ assertTrue(new Sentence("", 0, FLAGS).isEmpty());
}
@Test
public void testNotEmpty() {
- assertFalse(new Sentence("hello , world", 0, joshuaConfiguration).isEmpty());
+ assertFalse(new Sentence("hello , world", 0, FLAGS).isEmpty());
}
/**
@@ -79,23 +82,23 @@ public class SentenceTest {
*/
@Test
public void testTooManyTokensSourceTruncated() {
- assertTrue(new Sentence(this.tooLongInput, 0, joshuaConfiguration).length() == 202);
+ assertTrue(new Sentence(this.tooLongInput, 0, FLAGS).length() == 202);
}
@Test
public void testTooManyTokensSourceOnlyNotNull() {
- assertNotNull(new Sentence(this.tooLongInput, 0, joshuaConfiguration));
+ assertNotNull(new Sentence(this.tooLongInput, 0, FLAGS));
}
@Test
public void testTooManyTokensSourceAndTargetIsEmpty() {
- Sentence sentence = new Sentence(this.tooLongInput + " ||| target side", 0, joshuaConfiguration);
+ Sentence sentence = new Sentence(this.tooLongInput + " ||| target side", 0, FLAGS);
assertEquals(sentence.target, "");
}
@Test
public void testTooManyTokensSourceAndTargetTruncated() {
- Sentence sentence = new Sentence(this.tooLongInput + " ||| target side", 0, joshuaConfiguration);
+ Sentence sentence = new Sentence(this.tooLongInput + " ||| target side", 0, FLAGS);
assertTrue(sentence.length() == 202);
}
@@ -103,7 +106,7 @@ public class SentenceTest {
public void testClearlyNotTooManyTokens() {
// Concatenate MAX_SENTENCE_NODES, each shorter than the average length, joined by a space.
String input = "token";
- assertFalse(new Sentence(input, 0, joshuaConfiguration).isEmpty());
+ assertFalse(new Sentence(input, 0, FLAGS).isEmpty());
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/lattice/LatticeTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/lattice/LatticeTest.java b/joshua-core/src/test/java/org/apache/joshua/lattice/LatticeTest.java
index 1522120..1c351c0 100644
--- a/joshua-core/src/test/java/org/apache/joshua/lattice/LatticeTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/lattice/LatticeTest.java
@@ -21,10 +21,12 @@ package org.apache.joshua.lattice;
import java.util.ArrayList;
import java.util.List;
-import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Decoder;
import org.testng.Assert;
import org.testng.annotations.Test;
+import com.typesafe.config.Config;
+
/**
* Unit tests for Lattice class.
*
@@ -34,6 +36,8 @@ import org.testng.annotations.Test;
*/
@Test(groups = { "lattice" })
public class LatticeTest {
+
+ private static final Config FLAGS = Decoder.getDefaultFlags();
@Test
public void allPairsShortestPath() {
@@ -49,7 +53,7 @@ public class LatticeTest {
nodes.get(2).addArc(nodes.get(3), (float) 3.0, "b");
nodes.get(2).addArc(nodes.get(3), (float) 5.0, "c");
- Lattice<String> graph = new Lattice<String>(nodes, new JoshuaConfiguration());
+ Lattice<String> graph = new Lattice<String>(nodes, FLAGS);
Assert.assertEquals(graph.getShortestPath(0, 1), 1);
Assert.assertEquals(graph.getShortestPath(0, 2), 1);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/packed/Benchmark.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/Benchmark.java b/joshua-core/src/test/java/org/apache/joshua/packed/Benchmark.java
deleted file mode 100644
index 7c4fc80..0000000
--- a/joshua-core/src/test/java/org/apache/joshua/packed/Benchmark.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.packed;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.nio.IntBuffer;
-import java.nio.MappedByteBuffer;
-import java.nio.channels.FileChannel;
-import java.nio.channels.FileChannel.MapMode;
-import java.util.Random;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * This program runs a little benchmark to check reading speed on various data
- * representations.
- *
- * Usage: java Benchmark PACKED_GRAMMAR_DIR TIMES
- */
-
-public class Benchmark implements AutoCloseable{
-
- private static final Logger LOG = LoggerFactory.getLogger(Benchmark.class);
-
- private IntBuffer intBuffer;
- private MappedByteBuffer byteBuffer;
- private int[] intArray;
- private final FileInputStream fin;
-
- public Benchmark(String dir) throws IOException {
- File file = new File(dir + "/slice_00000.source");
- this.fin = new FileInputStream(file);
- FileChannel source_channel = this.fin.getChannel();
- int byte_size = (int) source_channel.size();
- int int_size = byte_size / 4;
-
- byteBuffer = source_channel.map(MapMode.READ_ONLY, 0, byte_size);
- intBuffer = byteBuffer.asIntBuffer();
-
- intArray = new int[int_size];
- intBuffer.get(intArray);
- }
-
- public void benchmark(int times) {
- LOG.info("Beginning benchmark.");
-
- Random r = new Random();
- r.setSeed(1234567890);
- int[] positions = new int[1000];
- for (int i = 0; i < positions.length; i++)
- positions[i] = r.nextInt(intArray.length);
-
- long sum;
-
- long start_time = System.currentTimeMillis();
-
- sum = 0;
- for (int t = 0; t < times; t++)
- for (int i = 0; i < positions.length; i++)
- sum += byteBuffer.getInt(positions[i] * 4);
- LOG.info("Sum: {}", sum);
- long byte_time = System.currentTimeMillis();
-
- sum = 0;
- for (int t = 0; t < times; t++)
- for (int i = 0; i < positions.length; i++)
- sum += intBuffer.get(positions[i]);
- LOG.info("Sum: {}", sum);
- long int_time = System.currentTimeMillis();
-
- sum = 0;
- for (int t = 0; t < times; t++)
- for (int i = 0; i < positions.length; i++)
- sum += intArray[positions[i]];
- LOG.info("Sum: {}", sum);
- long array_time = System.currentTimeMillis();
-
- sum = 0;
- for (int t = 0; t < times; t++)
- for (int i = 0; i < (intArray.length / 8); i++)
- sum += intArray[i * 6] + intArray[i * 6 + 2];
- LOG.info("Sum: {}", sum);
- long mult_time = System.currentTimeMillis();
-
- sum = 0;
- for (int t = 0; t < times; t++) {
- int index = 0;
- for (int i = 0; i < (intArray.length / 8); i++) {
- sum += intArray[index] + intArray[index + 2];
- index += 6;
- }
- }
- LOG.info("Sum: {}", sum);
- long add_time = System.currentTimeMillis();
-
- LOG.info("ByteBuffer: {}", (byte_time - start_time));
- LOG.info("IntBuffer: {}", (int_time - byte_time));
- LOG.info("Array: {}", (array_time - int_time));
- LOG.info("Multiply: {}", (mult_time - array_time));
- LOG.info("Add: {}", (add_time - mult_time));
- }
-
- public static void main(String args[]) throws IOException {
- try (Benchmark pr = new Benchmark(args[0]);) {
- pr.benchmark( Integer.parseInt(args[1]));
- }
- }
-
- @Override
- public void close() throws IOException {
- this.fin.close();
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/packed/CountRules.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/CountRules.java b/joshua-core/src/test/java/org/apache/joshua/packed/CountRules.java
deleted file mode 100644
index 5ada5ab..0000000
--- a/joshua-core/src/test/java/org/apache/joshua/packed/CountRules.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.packed;
-
-import java.nio.MappedByteBuffer;
-import java.nio.channels.FileChannel;
-import java.nio.channels.FileChannel.MapMode;
-
-import org.apache.joshua.corpus.Vocabulary;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
-/**
- * This program reads a packed representation and prints out some
- * basic information about it.
- *
- * Usage: java CountRules PACKED_GRAMMAR_DIR
- */
-
-public class CountRules {
-
- public static void main(String args[]) {
-
- String dir = args[0];
-
- File file = new File(dir + "/chunk_00000.source");
- FileInputStream stream = null;
- FileChannel channel = null;
- try {
- // read the vocabulary
- Vocabulary.read(new File(dir + "/vocabulary"));
-
- // get the channel etc
- stream = new FileInputStream(file);
- channel = stream.getChannel();
- int size = (int) channel.size();
-
- MappedByteBuffer buffer = channel.map(MapMode.READ_ONLY, 0, size);
- // byte[] bytes = new bytes[size];
- // buffer.get(bytes);
-
- // read the number of rules
- int numRules = buffer.getInt();
- System.out.println(String.format("There are %d source sides at the root", numRules));
-
- // read the first symbol and its offset
- for (int i = 0; i < numRules; i++) {
- // String symbol = Vocabulary.word(buffer.getInt());
- int symbol = buffer.getInt();
- String string = Vocabulary.word(symbol);
- int offset = buffer.getInt();
- System.out.println(String.format("-> %s/%d [%d]", string, symbol, offset));
- }
-
- } catch (IOException e) {
-
- e.printStackTrace();
-
- } finally {
- try {
- if (stream != null)
- stream.close();
-
- if (channel != null)
- channel.close();
-
- } catch (IOException e) {
-
- e.printStackTrace();
-
- }
- }
-
-
- // // Read in the bytes
- // int offset = 0;
- // int numRead = 0;
- // while (offset < bytes.length
- // && (numRead=is.read(bytes, offset, bytes.length-offset)) >= 0) {
- // offset += numRead;
- // }
-
- // // Ensure all the bytes have been read in
- // if (offset < bytes.length) {
- // throw new IOException("Could not completely read file "+file.getName());
- // }
-
- // // Close the input stream and return bytes
- // is.close();
- // return bytes;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/packed/PrintRules.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/PrintRules.java b/joshua-core/src/test/java/org/apache/joshua/packed/PrintRules.java
deleted file mode 100644
index af6507f..0000000
--- a/joshua-core/src/test/java/org/apache/joshua/packed/PrintRules.java
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.packed;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.nio.IntBuffer;
-import java.nio.MappedByteBuffer;
-import java.nio.channels.FileChannel;
-import java.nio.channels.FileChannel.MapMode;
-
-import org.apache.joshua.util.quantization.Quantizer;
-import org.apache.joshua.util.quantization.QuantizerConfiguration;
-import org.apache.joshua.corpus.Vocabulary;
-
-/**
- * This program reads a packed representation and prints out some basic
- * information about it.
- *
- * Usage: java PrintRules PACKED_GRAMMAR_DIR
- */
-
-public class PrintRules {
-
- private QuantizerConfiguration quantization;
-
- private int[] source;
- private int[] target;
- private MappedByteBuffer features;
- private MappedByteBuffer alignments;
-
- private int[] featureLookup;
- private int[] alignmentLookup;
-
- private boolean have_alignments;
-
- public PrintRules(String dir) throws IOException {
- File source_file = new File(dir + "/slice_00000.source");
- File target_file = new File(dir + "/slice_00000.target");
- File feature_file = new File(dir + "/slice_00000.features");
- File alignment_file = new File(dir + "/slice_00000.alignments");
-
- have_alignments = alignment_file.exists();
-
- // Read the vocabulary.
- Vocabulary.read(new File(dir + "/vocabulary"));
-
- // Read the quantizer setup.
- quantization = new QuantizerConfiguration();
- quantization.read(dir + "/quantization");
-
- // Get the channels etc.
- @SuppressWarnings("resource")
- FileChannel source_channel = new FileInputStream(source_file).getChannel();
- int source_size = (int) source_channel.size();
- IntBuffer source_buffer = source_channel.map(MapMode.READ_ONLY, 0,
- source_size).asIntBuffer();
- source = new int[source_size / 4];
- source_buffer.get(source);
-
- @SuppressWarnings("resource")
- FileChannel target_channel = new FileInputStream(target_file).getChannel();
- int target_size = (int) target_channel.size();
- IntBuffer target_buffer = target_channel.map(MapMode.READ_ONLY, 0,
- target_size).asIntBuffer();
- target = new int[target_size / 4];
- target_buffer.get(target);
-
- @SuppressWarnings("resource")
- FileChannel feature_channel = new FileInputStream(feature_file).getChannel();
- int feature_size = (int) feature_channel.size();
- features = feature_channel.map(MapMode.READ_ONLY, 0, feature_size);
-
- if (have_alignments) {
- @SuppressWarnings("resource")
- FileChannel alignment_channel = new FileInputStream(alignment_file).getChannel();
- int alignment_size = (int) alignment_channel.size();
- alignments = alignment_channel.map(MapMode.READ_ONLY, 0, alignment_size);
- }
-
- int num_feature_blocks = features.getInt();
- featureLookup = new int[num_feature_blocks];
- // Read away data size.
- features.getInt();
- for (int i = 0; i < num_feature_blocks; i++)
- featureLookup[i] = features.getInt();
-
- int num_alignment_blocks = alignments.getInt();
- alignmentLookup = new int[num_alignment_blocks];
- // Read away data size.
- alignments.getInt();
- for (int i = 0; i < num_alignment_blocks; i++)
- alignmentLookup[i] = alignments.getInt();
-
- if (num_alignment_blocks != num_feature_blocks)
- throw new RuntimeException("Number of blocks doesn't match up.");
- }
-
- public void traverse() {
- traverse(0, "");
- }
-
- private void traverse(int position, String src_side) {
- int num_children = source[position];
- int[] addresses = new int[num_children];
- int[] symbols = new int[num_children];
- int j = position + 1;
- for (int i = 0; i < num_children; i++) {
- symbols[i] = source[j++];
- addresses[i] = source[j++];
- }
- int num_rules = source[j++];
- for (int i = 0; i < num_rules; i++) {
- int lhs = source[j++];
- int tgt_address = source[j++];
- int data_address = source[j++];
- printRule(src_side, lhs, tgt_address, data_address);
- }
- for (int i = 0; i < num_children; i++) {
- traverse(addresses[i], src_side + " " + Vocabulary.word(symbols[i]));
- }
- }
-
- private String getTarget(int pointer) {
- StringBuilder sb = new StringBuilder();
- do {
- pointer = target[pointer];
- if (pointer != -1) {
- int symbol = target[pointer + 1];
- if (symbol < 0)
- sb.append(" ").append("NT" + symbol);
- else
- sb.append(" ").append(Vocabulary.word(symbol));
- }
- } while (pointer != -1);
- return sb.toString();
- }
-
- private String getFeatures(int block_id) {
- StringBuilder sb = new StringBuilder();
-
- int data_position = featureLookup[block_id];
- int num_features = features.getInt(data_position);
- data_position += 4;
- for (int i = 0; i < num_features; i++) {
- int feature_id = features.getInt(data_position);
- Quantizer quantizer = quantization.get(feature_id);
- sb.append(" " + Vocabulary.word(feature_id) + "=" +
- quantizer.read(features, data_position));
- data_position += 4 + quantizer.size();
- }
- return sb.toString();
- }
-
- private String getAlignments(int block_id) {
- StringBuilder sb = new StringBuilder();
-
- int data_position = alignmentLookup[block_id];
- byte num_points = alignments.get(data_position);
- for (int i = 0; i < num_points; i++) {
- byte src = alignments.get(data_position + 1 + 2 * i);
- byte tgt = alignments.get(data_position + 2 + 2 * i);
-
- sb.append(" " + src + "-" + tgt);
- }
- return sb.toString();
- }
-
- private void printRule(String src_side, int lhs, int tgt_address,
- int data_address) {
- System.out.println(Vocabulary.word(lhs) + " |||" +
- src_side + " |||" +
- getTarget(tgt_address) + " |||" +
- getFeatures(data_address) +
- (have_alignments ? " |||" + getAlignments(data_address) : ""));
- }
-
- public static void main(String args[]) throws IOException {
- PrintRules pr = new PrintRules(args[0]);
- pr.traverse();
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/packed/README
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/README b/joshua-core/src/test/java/org/apache/joshua/packed/README
deleted file mode 100644
index 3cb52b8..0000000
--- a/joshua-core/src/test/java/org/apache/joshua/packed/README
+++ /dev/null
@@ -1,6 +0,0 @@
-# This code generates the packed grammar representation from the grammar file
-rm -rf small_packed
-java -cp /home/hltcoe/mpost/code/joshua/bin:. joshua.tools.GrammarPacker packer.config small_packed small_grammar
-
-# This compiles and reads the grammar file
-java -cp $JOSHUA/bin:. CountRules small_packed
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/packed/VocabTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/VocabTest.java b/joshua-core/src/test/java/org/apache/joshua/packed/VocabTest.java
deleted file mode 100644
index 523df4c..0000000
--- a/joshua-core/src/test/java/org/apache/joshua/packed/VocabTest.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.packed;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.joshua.corpus.Vocabulary;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class VocabTest {
-
- private static final Logger LOG = LoggerFactory.getLogger(VocabTest.class);
-
- //FIXME: no main() in automated test case,
- public static void main(String args[]) {
-
- int numWords = 0;
- try {
- String dir = args[0];
-
- boolean read = Vocabulary.read(new File(dir + "/vocabulary"));
- if (! read) {
- System.err.println("VocabTest: Failed to read the vocabulary.");
- System.exit(1);
- }
-
- int id = 0;
- while (Vocabulary.hasId(id)) {
- String word = Vocabulary.word(id);
- System.out.println(String.format("VOCAB: %d\t%s", id, word));
- numWords++;
- id++;
- }
- } catch (IOException e) {
- LOG.error(e.getMessage(), e);
- }
-
- System.out.println("read " + numWords + " words");
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/26dcdb67/joshua-core/src/test/java/org/apache/joshua/packed/packer.config
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/packer.config b/joshua-core/src/test/java/org/apache/joshua/packed/packer.config
deleted file mode 100644
index 73edb1a..0000000
--- a/joshua-core/src/test/java/org/apache/joshua/packed/packer.config
+++ /dev/null
@@ -1,6 +0,0 @@
-#chunk_size 30000
-chunk_size 2500000
-
-quantizer boolean Abstract,Adjacent,ContainsX,GlueRule,Lexical,Monotonic,TargetTerminalsButNoSource
-quantizer float LexprobSourceGivenTarget,LexprobTargetGivenSource,PhrasePenalty,RarityPenalty,SourcePhraseGivenTarget,SourceTerminalsButNoTarget,TargetPhraseGivenSource
-quantizer byte TargetWords