You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/28 12:47:29 UTC
[1/2] incubator-joshua git commit: JOSHUA-299 Move regression tests
to proper unit tests
Repository: incubator-joshua
Updated Branches:
refs/heads/master aecc0b088 -> 1c76867aa
JOSHUA-299 Move regression tests to proper unit tests
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/5dd80a37
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/5dd80a37
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/5dd80a37
Branch: refs/heads/master
Commit: 5dd80a37217f4f61ca00aedd256947a412e13771
Parents: 9c6ae40
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Sep 21 07:53:21 2016 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Sep 21 07:53:21 2016 -0700
----------------------------------------------------------------------
pom.xml | 12 ++
.../org/apache/joshua/decoder/Translation.java | 1 -
.../joshua/decoder/segment_file/Sentence.java | 4 -
.../apache/joshua/decoder/TestTranslation.java | 140 +++++++++++++++++++
.../org/apache/joshua/decoder/package-info.java | 22 +++
.../org/apache/joshua/packed/VocabTest.java | 58 --------
src/test/java/org/apache/joshua/packed/test.sh | 20 ---
.../decoder/moses-compat/output.expected | 8 +-
8 files changed, 176 insertions(+), 89 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index feb676e..985232f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -150,6 +150,15 @@
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>assembly</id>
+ <phase>package</phase>
+ <goals>
+ <goal>single</goal>
+ </goals>
+ </execution>
+ </executions>
<configuration>
<archive>
<manifest>
@@ -159,6 +168,9 @@
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
+ <attach>true</attach>
+ <skipAssembly>false</skipAssembly>
+ <tarLongFileMode>gnu</tarLongFileMode>
</configuration>
</plugin>
<plugin>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/main/java/org/apache/joshua/decoder/Translation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Translation.java b/src/main/java/org/apache/joshua/decoder/Translation.java
index ff2aed0..2327ff2 100644
--- a/src/main/java/org/apache/joshua/decoder/Translation.java
+++ b/src/main/java/org/apache/joshua/decoder/Translation.java
@@ -32,7 +32,6 @@ import java.util.List;
import org.apache.joshua.decoder.ff.FeatureFunction;
import org.apache.joshua.decoder.ff.FeatureVector;
-import org.apache.joshua.decoder.ff.lm.StateMinimizingLanguageModel;
import org.apache.joshua.decoder.hypergraph.HyperGraph;
import org.apache.joshua.decoder.hypergraph.KBestExtractor;
import org.apache.joshua.decoder.io.DeNormalize;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
index f84c41a..1d8712d 100644
--- a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
@@ -21,20 +21,16 @@ package org.apache.joshua.decoder.segment_file;
import static org.apache.joshua.util.FormatUtils.addSentenceMarkers;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
-import java.util.Map;
import java.util.StringTokenizer;
-import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.KenLMPool;
import org.apache.joshua.decoder.LanguageModelStateManager;
import org.apache.joshua.decoder.ff.tm.Grammar;
import org.apache.joshua.lattice.Arc;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/test/java/org/apache/joshua/decoder/TestTranslation.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/TestTranslation.java b/src/test/java/org/apache/joshua/decoder/TestTranslation.java
new file mode 100644
index 0000000..bf4eaab
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/TestTranslation.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import org.testng.annotations.Test;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.file.Path;
+
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.testng.annotations.BeforeTest;
+import org.testng.Assert;
+import org.testng.annotations.AfterTest;
+
+/**
+ * Tests should
+ * <ol>
+ * <li>Should write translation (using -moses argument) to stdout, output-format info to n-best.txt</li>
+ * <li>Should write output-format info to n-best.txt (since no -moses)</li>
+ * </ol>
+ * We then undertake a simple diff on the outputs to see if Moses compatibility is achieved.
+ */
+public class TestTranslation {
+
+ private static final String[] MOSES_INPUT = {"-v", "0", "-moses", "-n-best-list", "n-best1.txt", "10", "distinct", ">", "output"};
+ private static final String[] STANDARD_INPUT = {"-v", "0", "-n-best-list", "n-best2.txt", "10", "distinct", ">>", "output"};
+ private JoshuaConfiguration mosesConfig;
+ private JoshuaConfiguration standardConfig;
+ private Path tmpFile;
+
+ @BeforeTest
+ public void beforeTest() {
+ mosesConfig = new JoshuaConfiguration();
+ standardConfig = new JoshuaConfiguration();
+ }
+
+ @AfterTest
+ public void afterTest() {
+ }
+
+ /**
+ * Should write translation to stdout, output-format info to n-best.txt
+ */
+ @Test
+ public void testMosesTranslationCompatibility() {
+
+ //First execute the MOSES_INPUT
+ mosesConfig.processCommandLineOptions(MOSES_INPUT);
+ mosesConfig.use_structured_output = true;
+ Decoder mosesDecoder = new Decoder(mosesConfig, null);
+ Translation mosesTranslations = mosesDecoder.decode(new Sentence("help", 1, mosesConfig));
+ getStructuredTranslations(tmpFile, mosesTranslations);
+
+ //Second execute the STANDARD_INPUT
+ standardConfig.processCommandLineOptions(STANDARD_INPUT);
+ standardConfig.use_structured_output = true;
+ Decoder standardDecoder = new Decoder(standardConfig, null);
+ Translation standardTranslations = standardDecoder.decode(new Sentence("help", 2, standardConfig));
+ getStructuredTranslations(tmpFile, standardTranslations);
+
+ File expectedFile = new File(TestTranslation.class.getClassLoader().getResource("decoder/moses-compat/output.expected").getFile());
+
+ try {
+ compareFileContents(tmpFile, expectedFile);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ private void compareFileContents(Path newFile, File expectedFile) throws IOException {
+
+ BufferedReader reader1 = new BufferedReader(new FileReader(new File(newFile.toFile().getPath())));
+ BufferedReader reader2 = new BufferedReader(new FileReader(new File(expectedFile.getPath())));
+
+ String line1 = null;
+ String line2 = null;
+ while (((line1 = reader1.readLine()) != null)
+ && ((line2 = reader2.readLine()) != null)) {
+ if (line1.equals(line2)) {
+ Assert.assertTrue(line1.equals(line2), "Contents (each line) of input files should be identical.");
+ } else {
+ Assert.fail("Contents of input files is not identical.");
+ }
+ }
+ reader1.close();
+ reader2.close();
+ }
+
+ private void getStructuredTranslations(Path tmpFile, Translation translations) {
+ for (StructuredTranslation sTranslation : translations.getStructuredTranslations()) {
+ try {
+ tmpFile = writeStructuredTranslationString(sTranslation);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ private Path writeStructuredTranslationString(StructuredTranslation sTranslation) throws IOException{
+ if (tmpFile==null) {
+ try {
+ tmpFile = java.nio.file.Files.createTempFile("output", null);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ byte[] bTranslation = (Integer.toString(sTranslation.getTranslationWordAlignments().get(0).get(0)) +
+ " ||| " + sTranslation.getTranslationString() +
+ " ||| " + sTranslation.getTranslationFeatures().entrySet().iterator().next().toString() +
+ " ||| " + Float.toString(sTranslation.getTranslationScore()) + "\n").getBytes(Charset.forName("UTF-8"));
+
+ FileOutputStream fos = new FileOutputStream(tmpFile.toFile(), true);
+ fos.write(bTranslation);
+ fos.close();
+ return tmpFile;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/test/java/org/apache/joshua/decoder/package-info.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/package-info.java b/src/test/java/org/apache/joshua/decoder/package-info.java
new file mode 100644
index 0000000..5ab1fe3
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * Tests for org.apache.joshua.decoder package.
+ */
+package org.apache.joshua.decoder;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/test/java/org/apache/joshua/packed/VocabTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/packed/VocabTest.java b/src/test/java/org/apache/joshua/packed/VocabTest.java
deleted file mode 100644
index 523df4c..0000000
--- a/src/test/java/org/apache/joshua/packed/VocabTest.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.packed;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.joshua.corpus.Vocabulary;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class VocabTest {
-
- private static final Logger LOG = LoggerFactory.getLogger(VocabTest.class);
-
- //FIXME: no main() in automated test case,
- public static void main(String args[]) {
-
- int numWords = 0;
- try {
- String dir = args[0];
-
- boolean read = Vocabulary.read(new File(dir + "/vocabulary"));
- if (! read) {
- System.err.println("VocabTest: Failed to read the vocabulary.");
- System.exit(1);
- }
-
- int id = 0;
- while (Vocabulary.hasId(id)) {
- String word = Vocabulary.word(id);
- System.out.println(String.format("VOCAB: %d\t%s", id, word));
- numWords++;
- id++;
- }
- } catch (IOException e) {
- LOG.error(e.getMessage(), e);
- }
-
- System.out.println("read " + numWords + " words");
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/test/java/org/apache/joshua/packed/test.sh
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/packed/test.sh b/src/test/java/org/apache/joshua/packed/test.sh
deleted file mode 100644
index be6cf27..0000000
--- a/src/test/java/org/apache/joshua/packed/test.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# test the vocabulary
-# javac VocabTest.java
-# java -cp .:${JOSHUA}/bin VocabTest small_packed
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/test/resources/decoder/moses-compat/output.expected
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/moses-compat/output.expected b/src/test/resources/decoder/moses-compat/output.expected
index 5fb08bf..b966f9a 100644
--- a/src/test/resources/decoder/moses-compat/output.expected
+++ b/src/test/resources/decoder/moses-compat/output.expected
@@ -1,6 +1,2 @@
-help
-0 ||| help ||| tm_glue_0=1.000 ||| 0.000
-help
-
-# n-best stuff to follow:
-0 ||| help ||| tm-glue-0= 1.000 ||| 0.000
+0 ||| help ||| tm_glue_0=1.0 ||| 0.0
+0 ||| help ||| tm_glue_0=1.0 ||| 0.0
[2/2] incubator-joshua git commit: Merge branch 'JOSHUA-299' of
github.com:lewismc/incubator-joshua
Posted by mj...@apache.org.
Merge branch 'JOSHUA-299' of github.com:lewismc/incubator-joshua
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/1c76867a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/1c76867a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/1c76867a
Branch: refs/heads/master
Commit: 1c76867aa8a04763bd7c9ebd970fc27b4227c99c
Parents: aecc0b0 5dd80a3
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Sep 28 08:47:21 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Sep 28 08:47:21 2016 -0400
----------------------------------------------------------------------
pom.xml | 12 ++
.../org/apache/joshua/decoder/Translation.java | 1 -
.../apache/joshua/decoder/TestTranslation.java | 140 +++++++++++++++++++
.../org/apache/joshua/decoder/package-info.java | 22 +++
.../org/apache/joshua/packed/VocabTest.java | 58 --------
src/test/java/org/apache/joshua/packed/test.sh | 20 ---
.../decoder/moses-compat/output.expected | 8 +-
7 files changed, 176 insertions(+), 85 deletions(-)
----------------------------------------------------------------------