You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/17 10:32:00 UTC
[19/56] [partial] incubator-joshua git commit: maven multi-module
layout 1st commit: moving files into joshua-core
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java b/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java
new file mode 100644
index 0000000..7e4c2cc
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/zmert/ZMERT.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.zmert;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.FileUtility;
+
+public class ZMERT {
+ public static void main(String[] args) throws Exception {
+ boolean external = false; // should each MERT iteration be launched externally?
+
+ if (args.length == 1) {
+ if (args[0].equals("-h")) {
+ printZMERTUsage(args.length, true);
+ System.exit(2);
+ } else {
+ external = false;
+ }
+ } else if (args.length == 3) {
+ external = true;
+ } else {
+ printZMERTUsage(args.length, false);
+ System.exit(1);
+ }
+
+ if (!external) {
+ JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+ MertCore myMert = new MertCore(args[0],joshuaConfiguration);
+ myMert.run_MERT(); // optimize lambda[]!!!
+ myMert.finish();
+ } else {
+ int maxMem = Integer.parseInt(args[1]);
+ String configFileName = args[2];
+ String stateFileName = FileUtility.dirname(configFileName) + "/ZMERT.temp.state";
+ String cp = System.getProperty("java.class.path");
+ boolean done = false;
+ int iteration = 0;
+ while (!done) {
+ ++iteration;
+ Runtime rt = Runtime.getRuntime();
+ Process p =
+ rt.exec("java -Xmx" + maxMem + "m -cp " + cp + " org.apache.joshua.zmert.MertCore "
+ + configFileName + " " + stateFileName + " " + iteration);
+ BufferedReader br_i = new BufferedReader(new InputStreamReader(p.getInputStream()));
+ BufferedReader br_e = new BufferedReader(new InputStreamReader(p.getErrorStream()));
+ String dummy_line = null;
+ while ((dummy_line = br_i.readLine()) != null) {
+ System.out.println(dummy_line);
+ }
+ while ((dummy_line = br_e.readLine()) != null) {
+ System.out.println(dummy_line);
+ }
+ int status = p.waitFor();
+
+ if (status == 90) {
+ done = true;
+ } else if (status == 91) {
+ done = false;
+ } else {
+ System.out.println("Z-MERT exiting prematurely (MertCore returned " + status + ")...");
+ System.exit(status);
+ }
+ }
+ }
+
+ System.exit(0);
+
+ } // main(String[] args)
+
+ public static void printZMERTUsage(int argsLen, boolean detailed) {
+ if (!detailed) {
+ println("Oops, you provided " + argsLen + " args!");
+ println("");
+ println("Usage:");
+ println(" ZMERT -maxMem maxMemoryInMB MERT_configFile");
+ println("");
+ println("Where -maxMem specifies the maximum amount of memory (in MB) Z-MERT is");
+ println("allowed to use when performing its calculations (no memroy is needed while");
+ println("the decoder is running),");
+ println("and the config file contains any subset of Z-MERT's 20-some parameters,");
+ println("one per line. Run ZMERT -h for more details on those parameters.");
+ } else {
+ println("Usage:");
+ println(" ZMERT -maxMem maxMemoryInMB MERT_configFile");
+ println("");
+ println("Where -maxMem specifies the maximum amount of memory (in MB) Z-MERT is");
+ println("allowed to use when performing its calculations (no memroy is needed while");
+ println("the decoder is running),");
+ println("and the config file contains any subset of Z-MERT's 20-some parameters,");
+ println("one per line. Those parameters, and their default values, are:");
+ println("");
+ println("Relevant files:");
+ println(" -dir dirPrefix: working directory\n [[default: null string (i.e. they are in the current directory)]]");
+ println(" -s sourceFile: source sentences (foreign sentences) of the MERT dataset\n [[default: null string (i.e. file name is not needed by MERT)]]");
+ println(" -r refFile: target sentences (reference translations) of the MERT dataset\n [[default: reference.txt]]");
+ println(" -rps refsPerSen: number of reference translations per sentence\n [[default: 1]]");
+ println(" -txtNrm textNormMethod: how should text be normalized?\n (0) don't normalize text,\n or (1) \"NIST-style\", and also rejoin 're, *'s, n't, etc,\n or (2) apply 1 and also rejoin dashes between letters,\n or (3) apply 1 and also drop non-ASCII characters,\n or (4) apply 1+2+3\n [[default: 1]]");
+ println(" -p paramsFile: file containing parameter names, initial values, and ranges\n [[default: params.txt]]");
+ println(" -docInfo documentInfoFile: file informing Z-MERT which document each\n sentence belongs to\n [[default: null string (i.e. all sentences are in one 'document')]]");
+ println(" -fin finalLambda: file name for final lambda[] values\n [[default: null string (i.e. no such file will be created)]]");
+ println("");
+ println("MERT specs:");
+ println(" -m metricName metric options: name of evaluation metric and its options\n [[default: BLEU 4 closest]]");
+ println(" -maxIt maxMERTIts: maximum number of MERT iterations\n [[default: 20]]");
+ println(" -prevIt prevMERTIts: maximum number of previous MERT iterations to\n construct candidate sets from\n [[default: 20]]");
+ println(" -minIt minMERTIts: number of iterations before considering an early exit\n [[default: 5]]");
+ println(" -stopIt stopMinIts: some early stopping criterion must be satisfied in\n stopMinIts *consecutive* iterations before an early exit\n [[default: 3]]");
+ println(" -stopSig sigValue: early MERT exit if no weight changes by more than sigValue\n [[default: -1 (i.e. this criterion is never investigated)]]");
+ println(" -thrCnt threadCount: number of threads to run in parallel when optimizing\n [[default: 1]]");
+ println(" -save saveInter: save intermediate cfg files (1) or decoder outputs (2)\n or both (3) or neither (0)\n [[default: 3]]");
+ println(" -compress compressFiles: should Z-MERT compress the files it produces (1)\n or not (0)\n [[default: 0]]");
+ println(" -ipi initsPerIt: number of intermediate initial points per iteration\n [[default: 20]]");
+ println(" -opi oncePerIt: modify a parameter only once per iteration (1) or not (0)\n [[default: 0]]");
+ println(" -rand randInit: choose initial point randomly (1) or from paramsFile (0)\n [[default: 0]]");
+ println(" -seed seed: seed used to initialize random number generator\n [[default: time (i.e. value returned by System.currentTimeMillis()]]");
+ // println(" -ud useDisk: reliance on disk (0-2; higher value => more reliance)\n [[default: 2]]");
+ println("");
+ println("Decoder specs:");
+ println(" -cmd commandFile: name of file containing commands to run the decoder\n [[default: null string (i.e. decoder is a JoshuaDecoder object)]]");
+ println(" -passIt passIterationToDecoder: should iteration number be passed\n to command file (1) or not (0)\n [[default: 0]]");
+ println(" -decOut decoderOutFile: name of the output file produced by the decoder\n [[default: output.nbest]]");
+ println(" -decExit validExit: value returned by decoder to indicate success\n [[default: 0]]");
+ println(" -dcfg decConfigFile: name of decoder config file\n [[default: dec_cfg.txt]]");
+ println(" -N N: size of N-best list (per sentence) generated in each MERT iteration\n [[default: 100]]");
+ println("");
+ println("Output specs:");
+ println(" -v verbosity: Z-MERT verbosity level (0-2; higher value => more verbose)\n [[default: 1]]");
+ println(" -decV decVerbosity: should decoder output be printed (1) or ignored (0)\n [[default: 0]]");
+ println("");
+ }
+ }
+
+ private static void println(Object obj) {
+ System.out.println(obj);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/main/java/org/apache/joshua/zmert/package-info.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/zmert/package-info.java b/joshua-core/src/main/java/org/apache/joshua/zmert/package-info.java
new file mode 100644
index 0000000..571b524
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/zmert/package-info.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * Provides code for performing minimum error rate training.
+ * Much of the code in this package is based on Och (2003).
+ * A deeper description of the algorithm is in Zaidan (2009).
+ */
+package org.apache.joshua.zmert;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/resources/log4j.properties b/joshua-core/src/main/resources/log4j.properties
new file mode 100644
index 0000000..acca5e9
--- /dev/null
+++ b/joshua-core/src/main/resources/log4j.properties
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# log4j settings
+log4j.rootLogger=WARN, stdout
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.Target=System.err
+log4j.appender.stdout.layout=org.apache.log4j.SimpleLayout
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/overview.html
----------------------------------------------------------------------
diff --git a/joshua-core/src/overview.html b/joshua-core/src/overview.html
new file mode 100644
index 0000000..7efe5b3
--- /dev/null
+++ b/joshua-core/src/overview.html
@@ -0,0 +1,41 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head></head>
+<body bgcolor="white">
+
+<!--
+##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
+##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
+##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
+-->
+
+Apache Joshua is an extensible, open source statistical
+hierarchical phrase-based machine translation system.
+
+<!--
+<h2>Related Documentation</h2>
+-->
+
+<!-- Put @see and @since tags down here. -->
+
+@see <a href="http://joshua.incubator.apache.org/">Joshua Website</a>
+
+
+</body>
+</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/corpus/SpanTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/corpus/SpanTest.java b/joshua-core/src/test/java/org/apache/joshua/corpus/SpanTest.java
new file mode 100644
index 0000000..70fe67a
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/corpus/SpanTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.corpus;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ *
+ *
+ * @author Lane Schwartz
+ */
+public class SpanTest {
+
+ @Test
+ public void iterator() {
+
+ Span span = new Span(1,10);
+
+ int expected = 1;
+
+ for (int actual : span) {
+ Assert.assertEquals(actual, expected);
+ expected++;
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/corpus/VocabularyTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/corpus/VocabularyTest.java b/joshua-core/src/test/java/org/apache/joshua/corpus/VocabularyTest.java
new file mode 100644
index 0000000..e3042ed
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/corpus/VocabularyTest.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.corpus;
+
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import static org.apache.joshua.util.FormatUtils.isNonterminal;
+import static org.testng.Assert.assertTrue;
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+
+import java.io.File;
+import java.io.IOException;
+
+public class VocabularyTest {
+ private static final String WORD1 = "word1";
+ private static final String WORD2 = "word2";
+ private static final String NON_TERMINAL = "[X]";
+ private static final String GOAL = "[GOAL]";
+
+ @BeforeMethod
+ public void init() {
+ Vocabulary.clear();
+ }
+
+ @AfterMethod
+ public void deinit() {
+ Vocabulary.clear();
+ }
+
+ @Test
+ public void givenVocabulary_whenEmpty_thenOnlyContainsUnknownWord() {
+ assertTrue(Vocabulary.hasId(Vocabulary.UNKNOWN_ID));
+ assertFalse(Vocabulary.hasId(1));
+ assertFalse(Vocabulary.hasId(-1));
+ assertEquals(Vocabulary.UNKNOWN_WORD, Vocabulary.word(Vocabulary.UNKNOWN_ID));
+ assertEquals(1, Vocabulary.size());
+ }
+
+ @Test
+ public void givenVocabulary_whenNewWord_thenMappingIsAdded() {
+ final int FIRST_WORD_ID = 1;
+ assertFalse(Vocabulary.hasId(FIRST_WORD_ID));
+ assertEquals(FIRST_WORD_ID, Vocabulary.id(WORD1));
+ //should return same id after second call:
+ assertEquals(FIRST_WORD_ID, Vocabulary.id(WORD1));
+ assertTrue(Vocabulary.hasId(FIRST_WORD_ID));
+ assertEquals(WORD1, Vocabulary.word(FIRST_WORD_ID));
+ assertEquals(2, Vocabulary.size());
+ }
+
+ @Test
+ public void givenVocabulary_whenCheckingStringInBracketsOrNegativeNumber_thenIsNonTerminal() {
+ //non-terminals
+ assertTrue(isNonterminal(NON_TERMINAL));
+ //terminals
+ assertFalse(isNonterminal(WORD1));
+ assertFalse(isNonterminal("[]"));
+ assertFalse(isNonterminal("["));
+ assertFalse(isNonterminal("]"));
+ assertFalse(isNonterminal(""));
+
+ //negative numbers indicate non-terminals
+ assertTrue(isNonterminal(-1));
+ assertTrue(isNonterminal(-5));
+
+ //positive numbers indicate terminals:
+ assertFalse(isNonterminal(0));
+ assertFalse(isNonterminal(5));
+
+
+ }
+
+ @Test
+ public void givenVocabulary_whenNonTerminal_thenReturnsStrictlyPositiveNonTerminalIndices() {
+ final int FIRST_NON_TERMINAL_INDEX = 1;
+ assertTrue(Vocabulary.id(NON_TERMINAL) < 0);
+ assertTrue(Vocabulary.hasId(FIRST_NON_TERMINAL_INDEX));
+ assertTrue(Vocabulary.hasId(-FIRST_NON_TERMINAL_INDEX));
+
+ assertTrue(Vocabulary.id("") > 0);
+ assertTrue(Vocabulary.id(WORD1) > 0);
+
+ final int SECOND_NON_TERMINAL_INDEX = 4;
+ assertTrue(Vocabulary.id(GOAL) < 0);
+ assertTrue(Vocabulary.hasId(SECOND_NON_TERMINAL_INDEX));
+ assertTrue(Vocabulary.hasId(-SECOND_NON_TERMINAL_INDEX));
+
+ assertTrue(Vocabulary.id(WORD2) > 0);
+ }
+
+ @Test
+ public void givenVocabulary_whenWritenAndReading_thenVocabularyStaysTheSame() throws IOException {
+ File vocabFile = File.createTempFile( "vocab", "tmp");
+ vocabFile.deleteOnExit();
+
+ int id1 = Vocabulary.id(WORD1);
+ int id2 = Vocabulary.id(NON_TERMINAL);
+ int id3 = Vocabulary.id(WORD2);
+
+ Vocabulary.write(vocabFile.getAbsolutePath());
+
+ Vocabulary.clear();
+
+ Vocabulary.read(vocabFile);
+
+ assertEquals(4, Vocabulary.size()); //unknown word + 3 other words
+ assertTrue(Vocabulary.hasId(id1));
+ assertTrue(Vocabulary.hasId(id2));
+ assertTrue(Vocabulary.hasId(id3));
+ assertEquals(id1, Vocabulary.id(WORD1));
+ assertEquals(id2, Vocabulary.id(NON_TERMINAL));
+ assertEquals(id3, Vocabulary.id(WORD2));
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java
new file mode 100644
index 0000000..439e486
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/ArpaFileTest.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.ff.lm.buildin_lm.TrieLM;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for testing ARPA language model class.
+ *
+ * @author Lane Schwartz
+ */
+public class ArpaFileTest {
+
+ String arpaFileName;
+
+ Vocabulary vocab;
+
+ @Test
+ public void setup() {
+
+ vocab = new Vocabulary();
+ vocab.id("a");
+ vocab.id("because");
+ vocab.id("boycott");
+ vocab.id("of");
+ vocab.id("parliament");
+ vocab.id("potato");
+ vocab.id("resumption");
+ vocab.id("the");
+
+ try {
+ File file = File.createTempFile("testLM", "arpa");
+ PrintStream out = new PrintStream(file, "UTF-8");
+
+ out.println();
+ out.println("\\data\\");
+ out.println("ngram 1=8");
+ out.println("ngram 2=4");
+ out.println("ngram 3=1");
+ out.println();
+
+ out.println("\\1-grams:");
+ out.println("-1.992672 a -0.1195484");
+ out.println("-2.713723 because -0.4665429");
+ out.println("-4.678545 boycott -0.0902521");
+ out.println("-1.609573 of -0.1991907");
+ out.println("-3.875917 parliament -0.1274891");
+ out.println("-9.753210 potato");
+ out.println("-4.678545 resumption -0.07945678");
+ out.println("-1.712444 the -0.1606644");
+
+ out.println();
+ out.println("\\2-grams:");
+ out.println("-0.3552987 because of -0.03083654");
+ out.println("-1.403534 of a");
+ out.println("-0.7507797 of the -0.05237135");
+ out.println("-0.7266324 resumption of");
+ out.println("-3.936147 the resumption");
+
+ out.println();
+ out.println("\\3-grams:");
+ out.println("-0.6309999 because of the");
+ out.println();
+
+ out.println("\\end\\");
+
+ out.close();
+ this.arpaFileName = file.getAbsolutePath();
+
+ } catch (IOException e) {
+ Assert.fail("Unable to create temporary file: " + e.toString());
+ }
+
+ }
+
+ @Test(dependsOnMethods = { "setup" })
+ public void testOrder() {
+ ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+ try {
+ Assert.assertEquals(arpaFile.getOrder(), 3);
+ } catch (FileNotFoundException e) {
+ Assert.fail(e.toString());
+ }
+ }
+
+ @Test(dependsOnMethods = { "setup" })
+ public void testIteration() {
+
+ ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+ Map<Integer, Integer> counts = new HashMap<>();
+
+ boolean iterationOccurred = false;
+
+ for (ArpaNgram ngram : arpaFile) {
+
+ iterationOccurred = true;
+
+ int order = ngram.order();
+ int count;
+ if (counts.containsKey(order)) {
+ count = counts.get(order) + 1;
+ } else {
+ count = 1;
+ }
+
+ counts.put(order, count);
+
+ }
+
+ Assert.assertTrue(iterationOccurred);
+
+ Assert.assertTrue(counts.containsKey(1));
+ Assert.assertTrue(counts.containsKey(2));
+ Assert.assertTrue(counts.containsKey(3));
+
+ Assert.assertEquals((int) counts.get(1), 8);
+ Assert.assertEquals((int) counts.get(2), 5);
+ Assert.assertEquals((int) counts.get(3), 1);
+
+ }
+
+ @Test(dependsOnMethods = { "setup" })
+ public void testSize() {
+ ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+ Assert.assertEquals(arpaFile.size(), 14);
+ }
+
+ @Test(dependsOnMethods = { "setup", "testIteration" })
+ public void testChildren() throws FileNotFoundException {
+ ArpaFile arpaFile = new ArpaFile(arpaFileName, vocab);
+
+ TrieLM lm = new TrieLM(arpaFile);
+ Assert.assertNotSame(lm.getChildren().size(), 0);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
new file mode 100644
index 0000000..5e71352
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.joshua.decoder.ff.lm;
+
+import static org.hamcrest.CoreMatchers.*;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.testng.Assert.assertEquals;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.ff.FeatureVector;
+import org.apache.joshua.decoder.ff.state_maintenance.NgramDPState;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+public class LanguageModelFFTest {
+
+ private static final float WEIGHT = 0.5f;
+
+ private LanguageModelFF ff;
+
+ @BeforeMethod
+ public void setUp() {
+ Decoder.resetGlobalState();
+
+ FeatureVector weights = new FeatureVector();
+ weights.set("lm_0", WEIGHT);
+ String[] args = {"-lm_type", "berkeleylm", "-lm_order", "2", "-lm_file", "./src/test/resources/lm/berkeley/lm"};
+
+ JoshuaConfiguration config = new JoshuaConfiguration();
+ ff = new LanguageModelFF(weights, args, config);
+ }
+
+ @AfterMethod
+ public void tearDown() {
+ Decoder.resetGlobalState();
+ }
+
+ @Test
+ public void givenNonStartSymbol_whenEstimateFutureCost_thenMultipleWeightAndLogProbabilty() {
+ int[] left = {3};
+ NgramDPState currentState = new NgramDPState(left, new int[left.length]);
+
+ float score = ff.getLM().sentenceLogProbability(left, 2, 1);
+ assertEquals(-99.0f, score, 0.0f);
+
+ float cost = ff.estimateFutureCost(null, currentState, null);
+ assertEquals(score * WEIGHT, cost, 0.0f);
+ }
+
+ @Test
+ public void givenOnlyStartSymbol_whenEstimateFutureCost_thenZeroResult() {
+ int startSymbolId = Vocabulary.id(Vocabulary.START_SYM);
+ int[] left = {startSymbolId};
+ NgramDPState currentState = new NgramDPState(left, new int[left.length]);
+
+ float score = ff.getLM().sentenceLogProbability(left, 2, 2);
+ assertEquals(0.0f, score, 0.0f);
+
+ float cost = ff.estimateFutureCost(null, currentState, null);
+ assertEquals(cost, score * WEIGHT, 0.0f);
+ }
+
+ @Test
+ public void givenStartAndOneMoreSymbol_whenEstimateFutureCost_thenMultipleWeightAndLogProbabilty() {
+ int startSymbolId = Vocabulary.id(Vocabulary.START_SYM);
+ assertThat(startSymbolId, not(equalTo(3)));
+ int[] left = {startSymbolId, 3};
+ NgramDPState currentState = new NgramDPState(left, new int[left.length]);
+
+ float score = ff.getLM().sentenceLogProbability(left, 2, 2);
+ assertEquals(score, -100.752754f, 0.0f);
+
+ float cost = ff.estimateFutureCost(null, currentState, null);
+ assertEquals(cost, score * WEIGHT, 0.0f);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
new file mode 100644
index 0000000..7752785
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm.berkeley_lm;
+
+import edu.berkeley.nlp.lm.ArrayEncodedNgramLanguageModel;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.assertFalse;
+
+import static org.testng.Assert.assertEquals;
+
+public class LMBerkeleySentenceProbablityTest {
+
+ @Test
+ public void verifySentenceLogProbability() {
+ LMGrammarBerkeley grammar = new LMGrammarBerkeley(2, "resources/berkeley_lm/lm");
+ grammar.registerWord("the", 2);
+ grammar.registerWord("chat-rooms", 3);
+ grammar.registerWord("<unk>", 0);
+
+ ArrayEncodedNgramLanguageModel<String> lm = grammar.getLM();
+ float expected =
+ lm.getLogProb(new int[] {}, 0, 0)
+ + lm.getLogProb(new int[] {0}, 0, 1)
+ + lm.getLogProb(new int[] {0, 2}, 0, 2)
+ + lm.getLogProb(new int[] {2, 3}, 0, 2)
+ + lm.getLogProb(new int[] {3, 0}, 0, 2);
+
+ float result = grammar.sentenceLogProbability(new int[] {0, 2, 3, 0}, 2, 0);
+ assertEquals(expected, result, 0.0);
+ }
+
+ @Test
+ public void givenUnknownWord_whenIsOov_thenCorrectlyDetected() {
+ LMGrammarBerkeley lm = new LMGrammarBerkeley(2, "resources/berkeley_lm/lm");
+ assertTrue(lm.isOov(Vocabulary.id("UNKNOWN_WORD")));
+ assertFalse(lm.isOov(Vocabulary.id("chat-rooms")));
+ }
+
+ @AfterMethod
+ public void tearDown() {
+ Vocabulary.clear();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
new file mode 100644
index 0000000..b0612d4
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm.berkeley_lm;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+/**
+ * Replacement for test/lm/berkeley/test.sh regression test
+ */
+
+public class LMGrammarBerkeleyTest {
+
+ private static final String INPUT = "the chat-rooms";
+ private static final String EXPECTED_OUTPUT = "tm_glue_0=2.000 lm_0=-7.153\n";
+ private static final String EXPECTED_OUTPUT_WITH_OOV = "tm_glue_0=2.000 lm_0=-7.153 lm_0_oov=0.000\n";
+ private static final String[] OPTIONS = "-v 0 -output-format %f".split(" ");
+
+ private JoshuaConfiguration joshuaConfig;
+ private Decoder decoder;
+
+ @DataProvider(name = "languageModelFiles")
+ public Object[][] lmFiles() {
+ return new Object[][]{{"resources/berkeley_lm/lm"},
+ {"resources/berkeley_lm/lm.gz"},
+ {"resources/berkeley_lm/lm.berkeleylm"},
+ {"resources/berkeley_lm/lm.berkeleylm.gz"}};
+ }
+
+ @AfterMethod
+ public void tearDown() throws Exception {
+ decoder.cleanUp();
+ }
+
+ @Test(dataProvider = "languageModelFiles")
+ public void verifyLM(String lmFile) {
+ joshuaConfig = new JoshuaConfiguration();
+ joshuaConfig.processCommandLineOptions(OPTIONS);
+ joshuaConfig.features.add("LanguageModel -lm_type berkeleylm -lm_order 2 -lm_file " + lmFile);
+ decoder = new Decoder(joshuaConfig, null);
+ final String translation = decode(INPUT).toString();
+ assertEquals(translation, EXPECTED_OUTPUT);
+ }
+
+ private Translation decode(String input) {
+ final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ return decoder.decode(sentence);
+ }
+
+ @Test
+ public void givenLmWithOovFeature_whenDecoder_thenCorrectFeaturesReturned() {
+ joshuaConfig = new JoshuaConfiguration();
+ joshuaConfig.processCommandLineOptions(OPTIONS);
+ joshuaConfig.features.add("LanguageModel -lm_type berkeleylm -oov_feature -lm_order 2 -lm_file resources/berkeley_lm/lm");
+ decoder = new Decoder(joshuaConfig, null);
+ final String translation = decode(INPUT).toString();
+ assertEquals(Decoder.weights.getDenseFeatures().size(), 3);
+ assertEquals(translation, EXPECTED_OUTPUT_WITH_OOV);
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
new file mode 100644
index 0000000..5946abd
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm.class_lm;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.ff.FeatureVector;
+import org.apache.joshua.decoder.ff.lm.LanguageModelFF;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
+import org.apache.joshua.decoder.ff.tm.Rule;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * This unit test relies on KenLM. If the KenLM library is not found when the test is run all tests will be skipped.
+ */
+public class ClassBasedLanguageModelTest {
+
+ private static final float WEIGHT = 0.5f;
+
+ private LanguageModelFF ff;
+
+ @BeforeMethod
+ public void setUp() {
+ Decoder.resetGlobalState();
+
+ FeatureVector weights = new FeatureVector();
+ weights.set("lm_0", WEIGHT);
+ String[] args = { "-lm_type", "kenlm", "-lm_order", "9",
+ "-lm_file", "./src/test/resources/lm/class_lm/class_lm_9gram.gz",
+ "-class_map", "./src/test/resources/lm/class_lm/class.map" };
+
+ JoshuaConfiguration config = new JoshuaConfiguration();
+ KenLmTestUtil.Guard(() -> ff = new LanguageModelFF(weights, args, config));
+ }
+
+ @AfterMethod
+ public void tearDown() {
+ Decoder.resetGlobalState();
+ }
+
+ @Test
+ public void givenLmDefinition_whenInitialized_thenInitializationIsCorrect() {
+ assertTrue(ff.isClassLM());
+ assertTrue(ff.isStateful());
+ }
+
+ @Test
+ public void givenRuleWithSingleWord_whenGetRuleId_thenIsMappedToClass() {
+ final int[] target = Vocabulary.addAll(new String[] { "professionalism" });
+ final Rule rule = new Rule(0, null, target, new FeatureVector(), 0, OwnerMap.register(OwnerMap.UNKNOWN_OWNER));
+ assertEquals(Vocabulary.word(ff.getRuleIds(rule)[0]), "13");
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassMapTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassMapTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassMapTest.java
new file mode 100644
index 0000000..5d37a05
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassMapTest.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm.class_lm;
+
+import static org.testng.Assert.assertEquals;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.ff.lm.ClassMap;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+
+public class ClassMapTest {
+
+ private static final int EXPECTED_CLASS_MAP_SIZE = 5140;
+
+ @BeforeMethod
+ public void setUp() {
+ Decoder.resetGlobalState();
+ }
+
+ @AfterMethod
+ public void tearDown() {
+ Decoder.resetGlobalState();
+ }
+
+ @Test
+ public void givenClassMapFile_whenClassMapRead_thenEntriesAreRead() {
+ // GIVEN
+ final String classMapFile = "./src/test/resources/lm/class_lm/class.map";
+
+ // WHEN
+ final ClassMap classMap = new ClassMap(classMapFile);
+
+ // THEN
+ assertEquals(classMap.size(), EXPECTED_CLASS_MAP_SIZE);
+ assertEquals(
+ Vocabulary.word(
+ classMap.getClassID(
+ Vocabulary.id("professionalism"))),
+ "13");
+ assertEquals(
+ Vocabulary.word(
+ classMap.getClassID(
+ Vocabulary.id("convenience"))),
+ "0");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/ff/tm/OwnerMapTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/tm/OwnerMapTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/tm/OwnerMapTest.java
new file mode 100644
index 0000000..8d129e1
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/tm/OwnerMapTest.java
@@ -0,0 +1,39 @@
+package org.apache.joshua.decoder.ff.tm;
+
+import static org.testng.Assert.assertEquals;
+
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+public class OwnerMapTest {
+
+ @BeforeMethod
+ public void setUp() throws Exception {
+ OwnerMap.clear();
+ }
+
+ @AfterMethod
+ public void tearDown() throws Exception {
+ OwnerMap.clear();
+ }
+
+ @Test(expectedExceptions = IllegalArgumentException.class)
+ public void given_invalidId_thenThrowsException() {
+ OwnerMap.getOwner(new OwnerId(3));
+ }
+
+ @Test
+ public void givenOwner_whenRegisteringOwner_thenMappingIsCorrect() {
+ // GIVEN
+ String owner = "owner";
+
+ // WHEN
+ OwnerId id = OwnerMap.register(owner);
+ OwnerId id2 = OwnerMap.register(owner);
+
+ // THEN
+ assertEquals(id, id2);
+ assertEquals(owner, OwnerMap.getOwner(id));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java
new file mode 100644
index 0000000..88b2350
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.io;
+
+import static org.testng.Assert.assertEquals;
+
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ *
+ */
+public class DeNormalizeTest {
+
+ private String tokenized;
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @BeforeMethod
+ protected void setUp() throws Exception {
+ tokenized = "my son 's friend , however , plays a high - risk game .";
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#processSingleLine(java.lang.String)}.
+ */
+ @Test(enabled = true)
+ public void testProcessSingleLine() {
+ tokenized =
+ "my son 's friend , ( dr . -rrb- robotnik , phd , however , wo n't play a high - risk game .";
+ String expected = "My son's friend, (Dr.) robotnik, PhD, however, won't play a high-risk game.";
+ String actual = DeNormalize.processSingleLine(tokenized);
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#processSingleLine(java.lang.String)}.
+ */
+ @Test
+ public void testProcessSingleLine_interspersed() {
+ tokenized = "phd mphil";
+ String expected = "PhD MPhil";
+ String actual = DeNormalize.processSingleLine(tokenized);
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for
+ * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+ */
+ @Test
+ public void testCapitalizeLineFirstLetter() throws Exception {
+ String actual = DeNormalize.capitalizeLineFirstLetter(tokenized);
+ String expected = "My son 's friend , however , plays a high - risk game .";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for
+ * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+ */
+ @Test
+ public void testCapitalizeLineFirstLetter_empty() throws Exception {
+ String actual = DeNormalize.capitalizeLineFirstLetter("");
+ String expected = "";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for
+ * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+ */
+ @Test
+ public void testCapitalizeLineFirstLetter_singleNumberCharacter() throws Exception {
+ String actual = DeNormalize.capitalizeLineFirstLetter("1");
+ String expected = "1";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for
+ * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+ */
+ @Test
+ public void testCapitalizeLineFirstLetter_singleLetterCharacter() throws Exception {
+ String actual = DeNormalize.capitalizeLineFirstLetter("a");
+ String expected = "A";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinPunctuationMarks(java.lang.String)}.
+ */
+ @Test
+ public void testJoinPunctuationMarks() throws Exception {
+ String actual = DeNormalize.joinPunctuationMarks(tokenized);
+ String expected = "my son 's friend, however, plays a high - risk game.";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinPunctuationMarks(java.lang.String)}.
+ */
+ @Test
+ public void testJoinPunctuationMarks_empty() throws Exception {
+ String actual = DeNormalize.joinPunctuationMarks("");
+ String expected = "";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+ */
+ @Test
+ public void testJoinHyphen() throws Exception {
+ String actual = DeNormalize.joinHyphen(tokenized);
+ String expected = "my son 's friend , however , plays a high-risk game .";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+ */
+ @Test
+ public void testJoinHypen_empty() throws Exception {
+ String actual = DeNormalize.joinHyphen("");
+ String expected = "";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+ */
+ @Test
+ public void testJoinHyphen_1space_btw_2hyphens() throws Exception {
+ String actual = DeNormalize.joinHyphen("a - - b");
+ String expected = "a-- b";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+ */
+ @Test
+ public void testJoinHyphen_2spaces_btw_2hyphens() throws Exception {
+ String actual = DeNormalize.joinHyphen("a - - b");
+ String expected = "a--b";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinContractions(java.lang.String)}.
+ */
+ @Test
+ public void testJoinContractions() throws Exception {
+ tokenized = "my son 's friend , however , wo n't play a high - risk game .";
+ String actual = DeNormalize.joinContractions(tokenized);
+ String expected = "my son's friend , however , won't play a high - risk game .";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinContractions(java.lang.String)}.
+ */
+ @Test
+ public void testJoinContractions_empty() throws Exception {
+ String actual = DeNormalize.joinContractions("");
+ String expected = "";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for
+ * {@link joshua.decoder.io.DeNormalize#capitalizeNameTitleAbbrvs(java.lang.String)}.
+ */
+ @Test
+ public void testCapitalizeNameTitleAbbrvs() throws Exception {
+ String actual, expected;
+ tokenized =
+ "my son 's friend , dr . robotnik , phd , however , wo n't play a high - risk game .";
+ expected =
+ "my son 's friend , Dr . robotnik , PhD , however , wo n't play a high - risk game .";
+ actual = DeNormalize.capitalizeNameTitleAbbrvs(tokenized);
+ assertEquals(actual, expected);
+
+ tokenized = "mr mrs ms miss dr prof";
+ expected = "Mr Mrs Ms Miss Dr Prof";
+ actual = DeNormalize.capitalizeNameTitleAbbrvs(tokenized);
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#capitalizeI(java.lang.String)}.
+ */
+ @Test
+ public void testCapitalizeI() throws Exception {
+ String expected, actual;
+
+ tokenized = "sam i am";
+ expected = "sam I am";
+ actual = DeNormalize.capitalizeI(tokenized);
+ assertEquals(actual, expected);
+
+ tokenized = "sam iam";
+ expected = "sam iam";
+ actual = DeNormalize.capitalizeI(tokenized);
+ assertEquals(actual, expected);
+
+ tokenized = "sami am";
+ expected = "sami am";
+ actual = DeNormalize.capitalizeI(tokenized);
+ assertEquals(actual, expected);
+
+ tokenized = "samiam";
+ expected = "samiam";
+ actual = DeNormalize.capitalizeI(tokenized);
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#replaceBracketTokens(java.lang.String)}.
+ */
+ @Test
+ public void testReplaceBracketTokens() throws Exception {
+ String expected, actual;
+
+ tokenized = "-lrb- i -rrb-";
+ expected = "( i )";
+ actual = DeNormalize.replaceBracketTokens(tokenized);
+ assertEquals(actual, expected);
+
+ tokenized = "-LRB- i -RRB-";
+ expected = "( i )";
+ actual = DeNormalize.replaceBracketTokens(tokenized);
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#detokenizeBracketTokens(java.lang.String)}
+ */
+ @Test
+ public void testDetokenizeBracketTokens() throws Exception {
+ String expected, actual;
+
+ tokenized = "( i )";
+ expected = "(i)";
+ actual = DeNormalize.joinPunctuationMarks(tokenized);
+ assertEquals(actual, expected);
+
+ tokenized = "[ i } j";
+ expected = "[i} j";
+ actual = DeNormalize.joinPunctuationMarks(tokenized);
+ assertEquals(actual, expected);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestStreamTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestStreamTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestStreamTest.java
new file mode 100644
index 0000000..a09aebb
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestStreamTest.java
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.io;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+
+import static org.mockito.Mockito.mock;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.fail;
+
+/**
+ * This class verifies the following behaviors:
+ * <p>
+ * - A blank input, i.e. "", does not cause a translation to be created.
+ * <p>
+ * - A non-blank input that is not followed by a newline, e.g. "1", causes a translation to be
+ * created.
+ * <p>
+ * - An input that contains whitespace or nothing followed by a newline causes a translation to be
+ * created, with "" as the source.
+ */
+
+public class TranslationRequestStreamTest {
+
+ private final JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+
+ @BeforeMethod
+ public void createTranslationRequest() throws Exception {
+ }
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @BeforeMethod
+ protected void setUp() {
+ }
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @AfterMethod
+ protected void tearDown() throws Exception {
+ }
+
+ /**
+ * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream#TranslationRequestStream(BufferedReader, JoshuaConfiguration)}.
+ */
+ @Test(enabled = false)
+ public void testTranslationRequest() {
+ fail("Not yet implemented");
+ }
+
+ /**
+ * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream#size()}.
+ */
+ @Test
+ public void testSize_uponConstruction() {
+ InputStream in = mock(InputStream.class);
+ TranslationRequestStream request = new TranslationRequestStream(
+ new BufferedReader(new InputStreamReader(in, Charset.defaultCharset())), joshuaConfiguration);
+ assertEquals(request.size(), 0);
+ }
+
+ /**
+ * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream#size()}.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testSize_1() throws Exception {
+ byte[] data = "1".getBytes();
+ ByteArrayInputStream input = new ByteArrayInputStream(data);
+ TranslationRequestStream request = new TranslationRequestStream(
+ new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+ request.next();
+ assertEquals(request.size(), 1);
+ }
+
+ /**
+ * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream##size()}.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testSize_newline() throws Exception {
+ byte[] data = "\n".getBytes();
+ ByteArrayInputStream input = new ByteArrayInputStream(data);
+ TranslationRequestStream request = new TranslationRequestStream(
+ new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+ request.next();
+ assertEquals(request.size(), 1);
+ }
+
+ /**
+ * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream##size()}.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testSize_2newlines() throws Exception {
+ byte[] data = "\n\n".getBytes();
+ ByteArrayInputStream input = new ByteArrayInputStream(data);
+ TranslationRequestStream request = new TranslationRequestStream(
+ new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+ request.next();
+ request.next();
+ assertEquals(request.size(), 2);
+ }
+
+ /**
+ * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream##next()}.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testNext_2Newlines() throws Exception {
+ byte[] data = "\n\n".getBytes();
+ ByteArrayInputStream input = new ByteArrayInputStream(data);
+ TranslationRequestStream request = new TranslationRequestStream(
+ new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+ assertEquals(request.next().source(), "");
+ assertEquals(request.next().source(), "");
+ }
+
+ /**
+ * Test method for {@link org.apache.joshua.decoder.io.TranslationRequestStream##remove()}.
+ */
+ @Test(enabled = false)
+ public void testRemove() {
+ fail("Not yet implemented");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
new file mode 100644
index 0000000..5b9db06
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.kbest_extraction;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import static com.google.common.base.Charsets.UTF_8;
+import static java.nio.file.Files.readAllBytes;
+import static org.testng.Assert.assertEquals;
+
+/**
+ * Reimplements the kbest extraction regression test
+ * TODO (fhieber): this test strangely only works with StateMinimizing KenLM.
+ * This is to be investigated
+ */
+
+public class KBestExtractionTest {
+
+ private static final String CONFIG = "resources/kbest_extraction/joshua.config";
+ private static final String INPUT = "a b c d e";
+ private static final Path GOLD_PATH = Paths.get("resources/kbest_extraction/output.scores.gold");
+
+ private JoshuaConfiguration joshuaConfig = null;
+ private Decoder decoder = null;
+
+ @BeforeMethod
+ public void setUp() throws Exception {
+ joshuaConfig = new JoshuaConfiguration();
+ joshuaConfig.readConfigFile(CONFIG);
+ joshuaConfig.outputFormat = "%i ||| %s ||| %c";
+ KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+ }
+
+ @AfterMethod
+ public void tearDown() throws Exception {
+ decoder.cleanUp();
+ decoder = null;
+ }
+
+ @Test
+ public void givenInput_whenKbestExtraction_thenOutputIsAsExpected() throws IOException {
+ final String translation = decode(INPUT).toString();
+ final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
+ assertEquals(gold, translation);
+ }
+
+ private Translation decode(String input) {
+ final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ return decoder.decode(sentence);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/CoverageTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/CoverageTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/CoverageTest.java
new file mode 100644
index 0000000..c6512da
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/CoverageTest.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.phrase;
+
+import org.testng.annotations.Test;
+
+import java.util.BitSet;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+import static org.testng.AssertJUnit.assertFalse;
+
+public class CoverageTest {
+
+ @Test
+ public void testSet() {
+ Coverage cov = new Coverage();
+ cov.set(1,2);
+ cov.set(3,4);
+ cov.set(2,3);
+ cov.set(0,1);
+
+ assertFalse(cov.compatible(0, 1));
+ assertFalse(cov.compatible(0, 5));
+ assertTrue(cov.compatible(4, 6));
+
+ assertEquals(cov.toString(), "4 ..........");
+ }
+
+ @Test
+ public void testPattern() {
+ Coverage cov = new Coverage();
+ cov.set(5,6);
+ cov.set(0,4);
+ BitSet bits = cov.pattern(4, 5);
+ BitSet answerBits = new BitSet();
+ answerBits.set(0);
+ assertEquals(bits, answerBits);
+ }
+
+ @Test
+ public void testCopyConstructor() {
+ Coverage a = new Coverage();
+ a.set(2,3);
+ Coverage b = new Coverage(a);
+ b.set(4,5);
+
+ assertFalse(a.toString().equals(b.toString()));
+ }
+
+ @Test
+ public void testCompatible() {
+ Coverage a = new Coverage();
+ a.set(10, 14);
+
+ assertTrue(a.compatible(14, 16));
+ assertTrue(a.compatible(6, 10));
+ assertTrue(a.compatible(1, 10));
+ assertTrue(a.compatible(1, 9));
+ assertFalse(a.compatible(9, 11));
+ assertFalse(a.compatible(13, 15));
+ assertFalse(a.compatible(9, 15));
+ assertFalse(a.compatible(9, 14));
+ assertFalse(a.compatible(10, 15));
+
+ a.set(0,9);
+
+ for (int width = 1; width <= 3; width++) {
+ for (int i = 0; i < 20; i++) {
+ int j = i + width;
+ if ((i == 9 && j == 10) || i >= 14)
+ assertTrue(a.compatible(i,j));
+ else {
+// System.err.println(String.format("%d,%d -> %s %s", i, j, a.compatible(i,j), a));
+ assertFalse(a.compatible(i,j));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testFirstZero() {
+ Coverage cov = new Coverage();
+ cov.set(2, 5);
+ assertEquals(cov.firstZero(), 0);
+ cov.set(8,10);
+ assertEquals(cov.firstZero(), 0);
+ cov.set(0, 2);
+ assertEquals(cov.firstZero(), 5);
+ cov.set(5, 7);
+ assertEquals(cov.firstZero(), 7);
+ cov.set(7,8);
+ assertEquals(cov.firstZero(), 10);
+ }
+
+ @Test
+ public void testOpenings() {
+ Coverage cov = new Coverage();
+ cov.set(0, 2);
+ cov.set(8, 10);
+
+ for (int i = 2; i < 7; i++) {
+ assertEquals(cov.leftOpening(i), 2);
+ assertEquals(cov.rightOpening(i, 17), 8);
+ assertEquals(cov.rightOpening(i, 7), 7);
+ }
+ }
+
+ @Test
+ public void testEquals() {
+ Coverage cov = new Coverage();
+ cov.set(9, 11);
+ Coverage cov2 = new Coverage();
+ cov2.set(9,10);
+ cov2.set(10,11);
+ assertEquals(cov, cov2);
+ }
+
+ @Test
+ public void testToString() {
+ Coverage cov = new Coverage();
+ cov.set(0, 40);
+ cov.set(44, 49);
+ assertEquals(cov.toString(), "40 ....xxxxx.");
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
new file mode 100644
index 0000000..8a68ab7
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ package org.apache.joshua.decoder.phrase.constrained;
+
+import static com.google.common.base.Charsets.UTF_8;
+import static java.nio.file.Files.readAllBytes;
+import static org.testng.Assert.assertEquals;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Reimplements the constrained phrase decoding test
+ */
+
+public class ConstrainedPhraseDecodingTest {
+
+ private static final String CONFIG = "resources/phrase_decoder/constrained.config";
+ private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama ||| President Obama to hinder a strategy for Republican re @-@ election";
+ private static final Path GOLD_PATH = Paths.get("resources/phrase_decoder/constrained.output.gold");
+
+ private JoshuaConfiguration joshuaConfig = null;
+ private Decoder decoder = null;
+
+ @BeforeMethod
+ public void setUp() throws Exception {
+ joshuaConfig = new JoshuaConfiguration();
+ joshuaConfig.readConfigFile(CONFIG);
+ KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+ }
+
+ @AfterMethod
+ public void tearDown() throws Exception {
+ decoder.cleanUp();
+ decoder = null;
+ }
+
+ @Test(enabled = false)
+ public void givenInput_whenConstrainedPhraseDecoding_thenOutputIsAsExpected() throws IOException {
+ final String translation = decode(INPUT).toString();
+ final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
+ assertEquals(gold, translation);
+ }
+
+ private Translation decode(String input) {
+ final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ return decoder.decode(sentence);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
new file mode 100644
index 0000000..f2fc6a7
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ package org.apache.joshua.decoder.phrase.decode;
+
+import static com.google.common.base.Charsets.UTF_8;
+import static java.nio.file.Files.readAllBytes;
+import static org.testng.Assert.assertEquals;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ * Reimplements the constrained phrase decoding test
+ */
+public class PhraseDecodingTest {
+
+ private static final String CONFIG = "resources/phrase_decoder/config";
+ private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
+ private static final Path GOLD_PATH = Paths.get("resources/phrase_decoder/output.gold");
+
+ private JoshuaConfiguration joshuaConfig = null;
+ private Decoder decoder = null;
+
+ @BeforeMethod
+ public void setUp() throws Exception {
+ joshuaConfig = new JoshuaConfiguration();
+ joshuaConfig.readConfigFile(CONFIG);
+ KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+ }
+
+ @AfterMethod
+ public void tearDown() throws Exception {
+ decoder.cleanUp();
+ decoder = null;
+ }
+
+ @Test(enabled = false)
+ public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
+ final String translation = decode(INPUT).toString();
+ final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
+ assertEquals(gold, translation);
+ }
+
+ private Translation decode(String input) {
+ final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ return decoder.decode(sentence);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
new file mode 100644
index 0000000..3b2852c
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.segment_file;
+
+import org.testng.annotations.Test;
+
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.AfterMethod;
+import static org.testng.Assert.*;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+
+public class AlmostTooLongSentenceTest {
+ private JoshuaConfiguration joshuaConfiguration;
+ private String almostTooLongInput;
+ private Sentence sentencePlusTarget;
+
+ @BeforeMethod
+ public void setUp() {
+ joshuaConfiguration = new JoshuaConfiguration();
+ almostTooLongInput = concatStrings(".", joshuaConfiguration.maxlen);
+ sentencePlusTarget = new Sentence(this.almostTooLongInput + " ||| target side", 0,joshuaConfiguration);
+ }
+
+ @AfterMethod
+ public void tearDown() {
+ }
+
+ @Test
+ public void testConstructor() {
+ Sentence sent = new Sentence("", 0,joshuaConfiguration);
+ assertNotNull(sent);
+ }
+
+ @Test
+ public void testEmpty() {
+ assertTrue(new Sentence("", 0,joshuaConfiguration).isEmpty());
+ }
+
+ @Test
+ public void testNotEmpty() {
+ assertFalse(new Sentence("hello , world", 0, joshuaConfiguration).isEmpty());
+ }
+
+ /**
+ * Return a string consisting of repeatedToken concatenated MAX_SENTENCE_NODES times.
+ *
+ * @param repeatedToken
+ * @param repeatedTimes
+ * @return
+ */
+ private String concatStrings(String repeatedToken, int repeatedTimes) {
+ String result = "";
+ for (int i = 0; i < repeatedTimes; i++) {
+ result += repeatedToken;
+ }
+ return result;
+ }
+
+ @Test
+ public void testAlmostButNotTooManyTokensSourceOnlyNotEmpty() {
+ assertFalse(new Sentence(this.almostTooLongInput, 0, joshuaConfiguration).isEmpty());
+ }
+
+ @Test
+ public void testAlmostButNotTooManyTokensSourceOnlyTargetNull() {
+ assertNull(new Sentence(this.almostTooLongInput, 0, joshuaConfiguration).target);
+ }
+
+ @Test
+ public void testAlmostButNotTooManyTokensSourceAndTargetTargetIsNotEmpty() {
+ assertFalse(this.sentencePlusTarget.isEmpty());
+ }
+
+ @Test
+ public void testAlmostButNotTooManyTokensSourceAndTargetTargetNull() {
+ assertEquals(this.sentencePlusTarget.target, "target side");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
new file mode 100644
index 0000000..8e0d171
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.segment_file;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+
+import org.testng.annotations.Test;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.AfterMethod;
+import static org.testng.Assert.*;
+
+public class SentenceTest {
+ private String tooLongInput;
+ private final JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+
+
+
+ @BeforeMethod
+ public void setUp() {
+ tooLongInput = concatTokens("*", joshuaConfiguration.maxlen * 2);
+ }
+
+ @AfterMethod
+ public void tearDown() {
+ }
+
+ @Test
+ public void testConstructor() {
+ Sentence sent = new Sentence("", 0, joshuaConfiguration);
+ assertNotNull(sent);
+ }
+
+ @Test
+ public void testEmpty() {
+ assertTrue(new Sentence("", 0, joshuaConfiguration).isEmpty());
+ }
+
+ @Test
+ public void testNotEmpty() {
+ assertFalse(new Sentence("hello , world", 0, joshuaConfiguration).isEmpty());
+ }
+
+ /**
+ * Return a string consisting of repeatedToken concatenated MAX_SENTENCE_NODES times, joined by a
+ * space.
+ *
+ * @param repeatedToken
+ * @param repeatedTimes
+ * @return
+ */
+ private String concatTokens(String repeatedToken, int repeatedTimes) {
+ String result = "";
+ for (int i = 0; i < repeatedTimes - 1; i++) {
+ result += repeatedToken + " ";
+ }
+ result += repeatedToken;
+ return result;
+ }
+
+ /**
+ * The too long input sentence should be truncated from 799 to 202 characters
+ * TODO is this a bug? maxlen is defined as 200 not 202 characters
+ */
+ @Test
+ public void testTooManyTokensSourceTruncated() {
+ assertTrue(new Sentence(this.tooLongInput, 0, joshuaConfiguration).length() == 202);
+ }
+
+ @Test
+ public void testTooManyTokensSourceOnlyNotNull() {
+ assertNotNull(new Sentence(this.tooLongInput, 0, joshuaConfiguration));
+ }
+
+ @Test
+ public void testTooManyTokensSourceAndTargetIsEmpty() {
+ Sentence sentence = new Sentence(this.tooLongInput + " ||| target side", 0, joshuaConfiguration);
+ assertEquals(sentence.target, "");
+ }
+
+ @Test
+ public void testTooManyTokensSourceAndTargetTruncated() {
+ Sentence sentence = new Sentence(this.tooLongInput + " ||| target side", 0, joshuaConfiguration);
+ assertTrue(sentence.length() == 202);
+ }
+
+ @Test
+ public void testClearlyNotTooManyTokens() {
+ // Concatenate MAX_SENTENCE_NODES, each shorter than the average length, joined by a space.
+ String input = "token";
+ assertFalse(new Sentence(input, 0, joshuaConfiguration).isEmpty());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5735d9ae/joshua-core/src/test/java/org/apache/joshua/lattice/ArcTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/lattice/ArcTest.java b/joshua-core/src/test/java/org/apache/joshua/lattice/ArcTest.java
new file mode 100644
index 0000000..1ad020c
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/lattice/ArcTest.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.lattice;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for Arc class.
+ *
+ * @author Lane Schwartz
+ * @since 2008-07-09
+ * @version $LastChangedDate$
+ */
+@Test(groups = { "lattice_arc" })
+public class ArcTest {
+
+ private final Node<String> head = new Node<String>(1);
+ private final Node<String> tail = new Node<String>(2);
+ private final float cost = (float) Math.PI;
+ private final String label = "pi";
+
+ private Arc<String> arc;
+
+ @Test(dependsOnMethods = { "org.apache.joshua.lattice.NodeTest.constructNode" })
+ //@Test(dependsOnGroups = {"lattice_node" })
+ public void constructArc() {
+
+ arc = new Arc<String>(tail, head, (float)cost, label);
+
+ Assert.assertEquals(arc.getHead(), head);
+ Assert.assertEquals(arc.getTail(), tail);
+ Assert.assertEquals(arc.getCost(), cost);
+ Assert.assertEquals(arc.getLabel(), label);
+
+ }
+
+ @Test(dependsOnMethods = { "constructArc" })
+ public void getHead() {
+
+ Assert.assertEquals(arc.getHead(), head);
+
+ }
+
+
+ @Test(dependsOnMethods = { "constructArc" })
+ public void getTail() {
+
+ Assert.assertEquals(arc.getTail(), tail);
+
+ }
+
+
+ @Test(dependsOnMethods = { "constructArc" })
+ public void getCost() {
+
+ Assert.assertEquals(arc.getCost(), cost);
+
+ }
+
+
+ @Test(dependsOnMethods = { "constructArc" })
+ public void getLabel() {
+
+ Assert.assertEquals(arc.getLabel(), label);
+
+ }
+}