You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/28 12:47:29 UTC

[1/2] incubator-joshua git commit: JOSHUA-299 Move regression tests to proper unit tests

Repository: incubator-joshua
Updated Branches:
  refs/heads/master aecc0b088 -> 1c76867aa


JOSHUA-299 Move regression tests to proper unit tests


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/5dd80a37
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/5dd80a37
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/5dd80a37

Branch: refs/heads/master
Commit: 5dd80a37217f4f61ca00aedd256947a412e13771
Parents: 9c6ae40
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Sep 21 07:53:21 2016 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Sep 21 07:53:21 2016 -0700

----------------------------------------------------------------------
 pom.xml                                         |  12 ++
 .../org/apache/joshua/decoder/Translation.java  |   1 -
 .../joshua/decoder/segment_file/Sentence.java   |   4 -
 .../apache/joshua/decoder/TestTranslation.java  | 140 +++++++++++++++++++
 .../org/apache/joshua/decoder/package-info.java |  22 +++
 .../org/apache/joshua/packed/VocabTest.java     |  58 --------
 src/test/java/org/apache/joshua/packed/test.sh  |  20 ---
 .../decoder/moses-compat/output.expected        |   8 +-
 8 files changed, 176 insertions(+), 89 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index feb676e..985232f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -150,6 +150,15 @@
       </plugin>
       <plugin>
         <artifactId>maven-assembly-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>assembly</id>
+            <phase>package</phase>
+            <goals>
+              <goal>single</goal>
+            </goals>
+          </execution>
+        </executions>
         <configuration>
           <archive>
             <manifest>
@@ -159,6 +168,9 @@
           <descriptorRefs>
             <descriptorRef>jar-with-dependencies</descriptorRef>
           </descriptorRefs>
+          <attach>true</attach>
+          <skipAssembly>false</skipAssembly>
+          <tarLongFileMode>gnu</tarLongFileMode>
         </configuration>
       </plugin>
       <plugin>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/main/java/org/apache/joshua/decoder/Translation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Translation.java b/src/main/java/org/apache/joshua/decoder/Translation.java
index ff2aed0..2327ff2 100644
--- a/src/main/java/org/apache/joshua/decoder/Translation.java
+++ b/src/main/java/org/apache/joshua/decoder/Translation.java
@@ -32,7 +32,6 @@ import java.util.List;
 
 import org.apache.joshua.decoder.ff.FeatureFunction;
 import org.apache.joshua.decoder.ff.FeatureVector;
-import org.apache.joshua.decoder.ff.lm.StateMinimizingLanguageModel;
 import org.apache.joshua.decoder.hypergraph.HyperGraph;
 import org.apache.joshua.decoder.hypergraph.KBestExtractor;
 import org.apache.joshua.decoder.io.DeNormalize;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
index f84c41a..1d8712d 100644
--- a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
@@ -21,20 +21,16 @@ package org.apache.joshua.decoder.segment_file;
 import static org.apache.joshua.util.FormatUtils.addSentenceMarkers;
 
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.Map;
 import java.util.StringTokenizer;
-import java.util.UUID;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.joshua.corpus.Vocabulary;
 import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.KenLMPool;
 import org.apache.joshua.decoder.LanguageModelStateManager;
 import org.apache.joshua.decoder.ff.tm.Grammar;
 import org.apache.joshua.lattice.Arc;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/test/java/org/apache/joshua/decoder/TestTranslation.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/TestTranslation.java b/src/test/java/org/apache/joshua/decoder/TestTranslation.java
new file mode 100644
index 0000000..bf4eaab
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/TestTranslation.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import org.testng.annotations.Test;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.file.Path;
+
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.testng.annotations.BeforeTest;
+import org.testng.Assert;
+import org.testng.annotations.AfterTest;
+
+/**
+ * Tests should
+ * <ol>
+ * <li>Should write translation (using -moses argument) to stdout, output-format info to n-best.txt</li>
+ * <li>Should write output-format info to n-best.txt (since no -moses)</li>
+ * </ol>
+ * We then undertake a simple diff on the outputs to see if Moses compatibility is achieved.
+ */
+public class TestTranslation {
+
+  private static final String[] MOSES_INPUT = {"-v", "0", "-moses", "-n-best-list", "n-best1.txt", "10", "distinct", ">", "output"};
+  private static final String[] STANDARD_INPUT = {"-v", "0", "-n-best-list", "n-best2.txt", "10", "distinct", ">>", "output"};
+  private JoshuaConfiguration mosesConfig;
+  private JoshuaConfiguration standardConfig;
+  private Path tmpFile;
+
+  @BeforeTest
+  public void beforeTest() {
+    mosesConfig = new JoshuaConfiguration();
+    standardConfig = new JoshuaConfiguration();
+  }
+
+  @AfterTest
+  public void afterTest() {
+  }
+
+  /**
+   * Should write translation to stdout, output-format info to n-best.txt
+   */
+  @Test
+  public void testMosesTranslationCompatibility() {
+
+    //First execute the MOSES_INPUT
+    mosesConfig.processCommandLineOptions(MOSES_INPUT);
+    mosesConfig.use_structured_output = true;
+    Decoder mosesDecoder = new Decoder(mosesConfig, null);
+    Translation mosesTranslations = mosesDecoder.decode(new Sentence("help", 1, mosesConfig));
+    getStructuredTranslations(tmpFile, mosesTranslations);
+
+    //Second execute the STANDARD_INPUT
+    standardConfig.processCommandLineOptions(STANDARD_INPUT);
+    standardConfig.use_structured_output = true;
+    Decoder standardDecoder = new Decoder(standardConfig, null);
+    Translation standardTranslations = standardDecoder.decode(new Sentence("help", 2, standardConfig));
+    getStructuredTranslations(tmpFile, standardTranslations);
+
+    File expectedFile = new File(TestTranslation.class.getClassLoader().getResource("decoder/moses-compat/output.expected").getFile());
+
+    try {
+      compareFileContents(tmpFile, expectedFile);
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+
+  private void compareFileContents(Path newFile, File expectedFile) throws IOException {
+
+    BufferedReader reader1 = new BufferedReader(new FileReader(new File(newFile.toFile().getPath())));
+    BufferedReader reader2 = new BufferedReader(new FileReader(new File(expectedFile.getPath())));
+
+    String line1 = null;
+    String line2 = null;
+    while (((line1 = reader1.readLine()) != null)
+        && ((line2 = reader2.readLine()) != null)) {
+      if (line1.equals(line2)) {
+        Assert.assertTrue(line1.equals(line2), "Contents (each line) of input files should be identical.");
+      } else {
+        Assert.fail("Contents of input files is not identical.");
+      }
+    }
+    reader1.close();
+    reader2.close();
+  }
+
+  private void getStructuredTranslations(Path tmpFile, Translation translations) {
+    for (StructuredTranslation sTranslation : translations.getStructuredTranslations()) {
+      try {
+        tmpFile = writeStructuredTranslationString(sTranslation);
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    }
+  }
+
+  private Path writeStructuredTranslationString(StructuredTranslation sTranslation) throws IOException{
+    if (tmpFile==null) {
+      try {
+        tmpFile = java.nio.file.Files.createTempFile("output", null);
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    }
+
+    byte[] bTranslation = (Integer.toString(sTranslation.getTranslationWordAlignments().get(0).get(0)) + 
+        " ||| " + sTranslation.getTranslationString() + 
+        " ||| " + sTranslation.getTranslationFeatures().entrySet().iterator().next().toString() + 
+        " ||| " + Float.toString(sTranslation.getTranslationScore()) + "\n").getBytes(Charset.forName("UTF-8"));
+
+    FileOutputStream fos = new FileOutputStream(tmpFile.toFile(), true);
+    fos.write(bTranslation);
+    fos.close();
+    return tmpFile;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/test/java/org/apache/joshua/decoder/package-info.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/package-info.java b/src/test/java/org/apache/joshua/decoder/package-info.java
new file mode 100644
index 0000000..5ab1fe3
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ * Tests for org.apache.joshua.decoder package.
+ */
+package org.apache.joshua.decoder;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/test/java/org/apache/joshua/packed/VocabTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/packed/VocabTest.java b/src/test/java/org/apache/joshua/packed/VocabTest.java
deleted file mode 100644
index 523df4c..0000000
--- a/src/test/java/org/apache/joshua/packed/VocabTest.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.packed;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.joshua.corpus.Vocabulary;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class VocabTest {
-
-  private static final Logger LOG = LoggerFactory.getLogger(VocabTest.class);
-
-  //FIXME: no main() in automated test case,
-  public static void main(String args[]) {
-
-    int numWords = 0;
-    try {
-      String dir = args[0];
-
-      boolean read = Vocabulary.read(new File(dir + "/vocabulary"));
-      if (! read) {
-        System.err.println("VocabTest: Failed to read the vocabulary.");
-        System.exit(1);
-      }
-
-      int id = 0;
-      while (Vocabulary.hasId(id)) {
-        String word = Vocabulary.word(id);
-        System.out.println(String.format("VOCAB: %d\t%s", id, word));
-        numWords++;
-        id++;
-      }
-    } catch (IOException e) {
-      LOG.error(e.getMessage(), e);
-    }
-
-    System.out.println("read " + numWords + " words");
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/test/java/org/apache/joshua/packed/test.sh
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/packed/test.sh b/src/test/java/org/apache/joshua/packed/test.sh
deleted file mode 100644
index be6cf27..0000000
--- a/src/test/java/org/apache/joshua/packed/test.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# test the vocabulary
-# javac VocabTest.java
-# java -cp .:${JOSHUA}/bin VocabTest small_packed

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5dd80a37/src/test/resources/decoder/moses-compat/output.expected
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/moses-compat/output.expected b/src/test/resources/decoder/moses-compat/output.expected
index 5fb08bf..b966f9a 100644
--- a/src/test/resources/decoder/moses-compat/output.expected
+++ b/src/test/resources/decoder/moses-compat/output.expected
@@ -1,6 +1,2 @@
-help
-0 ||| help ||| tm_glue_0=1.000 ||| 0.000
-help
-
-# n-best stuff to follow:
-0 ||| help ||| tm-glue-0= 1.000 ||| 0.000
+0 ||| help ||| tm_glue_0=1.0 ||| 0.0
+0 ||| help ||| tm_glue_0=1.0 ||| 0.0


[2/2] incubator-joshua git commit: Merge branch 'JOSHUA-299' of github.com:lewismc/incubator-joshua

Posted by mj...@apache.org.
Merge branch 'JOSHUA-299' of github.com:lewismc/incubator-joshua


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/1c76867a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/1c76867a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/1c76867a

Branch: refs/heads/master
Commit: 1c76867aa8a04763bd7c9ebd970fc27b4227c99c
Parents: aecc0b0 5dd80a3
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Sep 28 08:47:21 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Sep 28 08:47:21 2016 -0400

----------------------------------------------------------------------
 pom.xml                                         |  12 ++
 .../org/apache/joshua/decoder/Translation.java  |   1 -
 .../apache/joshua/decoder/TestTranslation.java  | 140 +++++++++++++++++++
 .../org/apache/joshua/decoder/package-info.java |  22 +++
 .../org/apache/joshua/packed/VocabTest.java     |  58 --------
 src/test/java/org/apache/joshua/packed/test.sh  |  20 ---
 .../decoder/moses-compat/output.expected        |   8 +-
 7 files changed, 176 insertions(+), 85 deletions(-)
----------------------------------------------------------------------