You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/15 16:02:51 UTC

[3/3] incubator-joshua git commit: added test of sparse features in the grammars

added test of sparse features in the grammars


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/9c6ae40b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/9c6ae40b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/9c6ae40b

Branch: refs/heads/master
Commit: 9c6ae40ba1ed55cc54d9dcc99965572f4d122f0e
Parents: e5eb3a6
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Sep 15 18:02:30 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Sep 15 18:02:30 2016 +0200

----------------------------------------------------------------------
 .../joshua/decoder/cky/SparseFeatureTest.java   |  64 +++++++++++
 .../org/apache/joshua/decoder/cky/TestUtil.java | 115 +++++++++----------
 .../sparse-features/joshua-packed.config        |   4 +-
 .../grammar/sparse-features/joshua.config       |   4 +-
 .../grammar/sparse-features/output.gold         |   1 -
 .../grammar/sparse-features/test-packed.sh      |  32 ------
 .../resources/grammar/sparse-features/test.sh   |  32 ------
 7 files changed, 122 insertions(+), 130 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9c6ae40b/src/test/java/org/apache/joshua/decoder/cky/SparseFeatureTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/SparseFeatureTest.java b/src/test/java/org/apache/joshua/decoder/cky/SparseFeatureTest.java
new file mode 100644
index 0000000..92d575c
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/cky/SparseFeatureTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.joshua.decoder.cky;
+
+import static org.apache.joshua.decoder.cky.TestUtil.translate;
+import static org.testng.Assert.assertEquals;
+
+import java.util.List;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.util.io.KenLmTestUtil;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class SparseFeatureTest {
+
+  private JoshuaConfiguration joshuaConfig;
+  private Decoder decoder;
+  private String INPUT_STRING = "el chico";
+  private String EXPECTED_OUTPUT = "0 ||| the boy ||| tm_pt_0=1.000 tm_glue_0=1.000 sparse_test_feature=1.000 svd=1.000 the_boy=1.000 ||| 1.000\n";
+
+  @AfterMethod
+  public void tearDown() throws Exception {
+    if (decoder != null) {
+      decoder.cleanUp();
+      decoder = null;
+    }
+  }
+  
+  @DataProvider(name = "configurationFiles")
+  public Object[][] configFiles() {
+    return new Object[][]{{"src/test/resources/grammar/sparse-features/joshua.config"},
+      {"src/test/resources/grammar/sparse-features/joshua-packed.config"}};
+  }
+
+  @Test(dataProvider = "configurationFiles")
+  public void givenGrammar_whenDecoding_thenScoreAndTranslationCorrect(String configFile) throws Exception {
+    configureDecoder(configFile);
+
+    String decodedString = translate(INPUT_STRING, decoder, joshuaConfig);
+
+    assertEquals(decodedString, EXPECTED_OUTPUT);
+  }
+
+  public void configureDecoder(String pathToConfig) throws Exception {
+    joshuaConfig = new JoshuaConfiguration();
+    joshuaConfig.readConfigFile(pathToConfig);
+    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9c6ae40b/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java b/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
index 35800c6..1ca22b2 100644
--- a/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
+++ b/src/test/java/org/apache/joshua/decoder/cky/TestUtil.java
@@ -1,20 +1,16 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
+ * agreements. See the NOTICE file distributed with this work for additional information regarding
+ * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License. You may obtain a
+ * copy of the License at
  *
- *  http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
  */
 package org.apache.joshua.decoder.cky;
 
@@ -32,56 +28,53 @@ import org.apache.joshua.decoder.segment_file.Sentence;
 
 public class TestUtil {
 
-	public static final String N_BEST_SEPARATOR = "\n";
+  public static final String N_BEST_SEPARATOR = "\n";
 
-	/**
-	 * Loads a text file and returns a list containing one string per line
-	 * in the file.
-	 * @param pathToFile
-	 * @return
-	 * @throws IOException
-	 */
-	public static List<String> loadStringsFromFile(String pathToFile) throws IOException {
-		List<String> inputLines = Files.lines(Paths.get(pathToFile)).collect(Collectors.toList());
-		return inputLines;
-	}
+  /**
+   * Loads a text file and returns a list containing one string per line in the file.
+   * 
+   * @param pathToFile
+   * @return
+   * @throws IOException
+   */
+  public static List<String> loadStringsFromFile(String pathToFile) throws IOException {
+    List<String> inputLines = Files.lines(Paths.get(pathToFile)).collect(Collectors.toList());
+    return inputLines;
+  }
 
-	/**
-	 * 
-	 * @param inputStrings
-	 *            A list of strings that should be decoded,
-	 * @param decoder
-	 *            An initialized decoder,
-	 * @param joshuaConfig
-	 *            The JoshuaConfiguration corresponding to the decoder.
-	 * @return A list of decoded strings. If the decoder produces a n-best list
-	 *         (separated by N_BEST_SEPARATOR), then each translation of the
-	 *         n-best list has its own entry in the returned list.
-	 */
-	public static List<String> decodeList(List<String> inputStrings, Decoder decoder,
-			JoshuaConfiguration joshuaConfig) {
-		final List<String> decodedStrings = new ArrayList<>();
+  /**
+   * 
+   * @param inputStrings A list of strings that should be decoded,
+   * @param decoder An initialized decoder,
+   * @param joshuaConfig The JoshuaConfiguration corresponding to the decoder.
+   * @return A list of decoded strings. If the decoder produces a n-best list (separated by
+   *         N_BEST_SEPARATOR), then each translation of the n-best list has its own entry in the
+   *         returned list.
+   */
+  public static List<String> decodeList(List<String> inputStrings, Decoder decoder,
+      JoshuaConfiguration joshuaConfig) {
+    final List<String> decodedStrings = new ArrayList<>();
 
-		for (String inputString : inputStrings) {
-			final Sentence sentence = new Sentence(inputString, 0, joshuaConfig);
-			final String[] nBestList = decoder.decode(sentence).toString().split(N_BEST_SEPARATOR);
-			decodedStrings.addAll(Arrays.asList(nBestList));
-		}
+    for (String inputString : inputStrings) {
+      final Sentence sentence = new Sentence(inputString, 0, joshuaConfig);
+      final String[] nBestList = decoder.decode(sentence).toString().split(N_BEST_SEPARATOR);
+      decodedStrings.addAll(Arrays.asList(nBestList));
+    }
 
-		return decodedStrings;
-	}
-	
-	/**
-	 * Translates the given input string and returns the translation
-	 * converted into a string.
-	 * @param input
-	 * @param decoder
-	 * @param joshuaConfig
-	 * @return
-	 */
-	public static String translate(String input, Decoder decoder, JoshuaConfiguration joshuaConfig) {
-	    final Sentence sentence = new Sentence(input, 0, joshuaConfig);
-	    return decoder.decode(sentence).toString();
-	}
+    return decodedStrings;
+  }
+
+  /**
+   * Translates the given input string and returns the translation converted into a string.
+   * 
+   * @param input
+   * @param decoder
+   * @param joshuaConfig
+   * @return
+   */
+  public static String translate(String input, Decoder decoder, JoshuaConfiguration joshuaConfig) {
+    final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+    return decoder.decode(sentence).toString();
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9c6ae40b/src/test/resources/grammar/sparse-features/joshua-packed.config
----------------------------------------------------------------------
diff --git a/src/test/resources/grammar/sparse-features/joshua-packed.config b/src/test/resources/grammar/sparse-features/joshua-packed.config
index 3b73c24..66a2405 100644
--- a/src/test/resources/grammar/sparse-features/joshua-packed.config
+++ b/src/test/resources/grammar/sparse-features/joshua-packed.config
@@ -1,5 +1,5 @@
-tm = thrax pt 20 grammar.packed
-tm = thrax glue -1 grammar.glue
+tm = thrax pt 20 src/test/resources/grammar/sparse-features/grammar.packed
+tm = thrax glue -1 src/test/resources/grammar/sparse-features/grammar.glue
 default-non-terminal = X
 goal-symbol = GOAL
 mark-oovs = false

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9c6ae40b/src/test/resources/grammar/sparse-features/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/grammar/sparse-features/joshua.config b/src/test/resources/grammar/sparse-features/joshua.config
index 91d9c48..2cb5e01 100644
--- a/src/test/resources/grammar/sparse-features/joshua.config
+++ b/src/test/resources/grammar/sparse-features/joshua.config
@@ -1,5 +1,5 @@
-tm = thrax pt 20 grammar
-tm = thrax glue -1 grammar.glue
+tm = thrax pt 20 src/test/resources/grammar/sparse-features/grammar
+tm = thrax glue -1 src/test/resources/grammar/sparse-features/grammar.glue
 default-non-terminal = X
 goal-symbol = GOAL
 mark-oovs = false

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9c6ae40b/src/test/resources/grammar/sparse-features/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/grammar/sparse-features/output.gold b/src/test/resources/grammar/sparse-features/output.gold
deleted file mode 100644
index 7e07c66..0000000
--- a/src/test/resources/grammar/sparse-features/output.gold
+++ /dev/null
@@ -1 +0,0 @@
-0 ||| the boy ||| tm_pt_0=1.000 tm_glue_0=1.000 sparse_test_feature=1.000 svd=1.000 the_boy=1.000 ||| 1.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9c6ae40b/src/test/resources/grammar/sparse-features/test-packed.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/grammar/sparse-features/test-packed.sh b/src/test/resources/grammar/sparse-features/test-packed.sh
deleted file mode 100755
index cf3c460..0000000
--- a/src/test/resources/grammar/sparse-features/test-packed.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-echo el chico | $JOSHUA/bin/joshua -c joshua-packed.config -v 0 > output
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
-  rm -f diff output
-  exit 0
-else
-  exit 1
-fi
-
-

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9c6ae40b/src/test/resources/grammar/sparse-features/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/grammar/sparse-features/test.sh b/src/test/resources/grammar/sparse-features/test.sh
deleted file mode 100755
index 8560989..0000000
--- a/src/test/resources/grammar/sparse-features/test.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-echo el chico | $JOSHUA/bin/joshua -c joshua.config -v 0 > output 2> log
-
-# Compare
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
-  rm -f diff output log
-  exit 0
-else
-  exit 1
-fi
-
-