You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/02 13:27:48 UTC
[1/2] incubator-joshua git commit: Re-enabled JUnit tests with a
newer surefire plugin; fixed some of the tests;
KenLM tests have been temporarily disabled. Cleaned whitespace. Fixed a bug in
featurevector
Repository: incubator-joshua
Updated Branches:
refs/heads/master c4ce122a7 -> 35e9c58ff
Re-enabled JUnit tests with a newer surefire plugin; fixed some of the tests; KenLM tests have been temporarily disabled. Cleaned whitespace. Fixed a bug in featurevector
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/b1743cba
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/b1743cba
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/b1743cba
Branch: refs/heads/master
Commit: b1743cbad58523a6f33883fe971c20098fd5153c
Parents: c4ce122
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Thu Jun 2 14:46:31 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Jun 2 14:46:31 2016 +0200
----------------------------------------------------------------------
pom.xml | 12 ++-
.../apache/joshua/decoder/ff/FeatureVector.java | 91 +++++++++++---------
.../decoder/ff/lm/LanguageModelFFTest.java | 28 +++---
.../lm/berkeley_lm/LMGrammarBerkeleyTest.java | 21 ++---
.../kbest_extraction/KBestExtractionTest.java | 14 +--
.../ConstrainedPhraseDecodingTest.java | 14 +--
.../phrase/decode/PhraseDecodingTest.java | 14 +--
.../org/apache/joshua/system/KenLmTest.java | 2 +
.../joshua/system/StructuredOutputTest.java | 4 +-
.../system/StructuredTranslationTest.java | 52 +++++------
10 files changed, 136 insertions(+), 116 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 725fd0d..40a6940 100644
--- a/pom.xml
+++ b/pom.xml
@@ -28,8 +28,8 @@
<packaging>jar</packaging>
<version>6.0.6-SNAPSHOT</version>
<name>Apache Joshua Machine Translation Toolkit</name>
- <description>Joshua is an open-source statistical machine
- translation decoder for phrase-based, hierarchical,
+ <description>Joshua is an open-source statistical machine
+ translation decoder for phrase-based, hierarchical,
and syntax-based machine translation, written in Java.
</description>
<url>http://joshua.incubator.apache.org</url>
@@ -141,6 +141,10 @@
</descriptorRefs>
</configuration>
</plugin>
+ <plugin>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>2.19.1</version>
+ </plugin>
</plugins>
</build>
<dependencies>
@@ -203,12 +207,12 @@
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version}</version>
</dependency>
-
+
<!-- Test Dependencies -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <version>4.10</version>
+ <version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java b/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
index d722de5..1b39c78 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
@@ -28,17 +28,17 @@ import java.util.Set;
/**
* An implementation of a sparse feature vector, using for representing both weights and feature
* values.
- *
+ *
* This class is used to hold both the decoder weights and the feature values accumulated across
* each edge. When features are read in upon decoder startup, they all start out as sparse features
* and are stored in the hash table. After the feature functions have been loaded, the decoder
* queries each of them for their sparse features via {@link registerDenseFeatures}. Those features
* returned by each decoder are then *removed* from the sparse feature hash and placed in the dense
* feature array. Therefore, when a feature registers a dense feature, it should take care to
- * query either {@link org.apache.joshua.decoder.ff.FeatureVector#getDense(int)} or
- * {@link org.apache.joshua.decoder.ff.FeatureVector#getSparse(String)} when asking for the feature
- * values later on.
- *
+ * query either {@link org.apache.joshua.decoder.ff.FeatureVector#getDense(int)} or
+ * {@link org.apache.joshua.decoder.ff.FeatureVector#getSparse(String)} when asking for the feature
+ * values later on.
+ *
* @author Matt Post post@cs.jhu.edu
*/
@@ -68,18 +68,18 @@ public class FeatureVector {
/**
* This version of the constructor takes an uninitialized feature with potentially intermingled
* labeled and unlabeled feature values, of the format:
- *
+ *
* [feature1=]value [feature2=]value
- *
+ *
* It produces a Feature Vector where all unlabeled features have been labeled by appending the
* unlabeled feature index (starting at 0) to the defaultPrefix value.
- *
+ *
* **IMPORTANT** The feature values are inverted, for historical reasons, which leads to a lot
- * of confusion. They have to be inverted here and when the score is actually computed. They
+ * of confusion. They have to be inverted here and when the score is actually computed. They
* are inverted here (which is used to build the feature vector representation of a rule's dense
* features) and in {@link org.apache.joshua.decoder.ff.tm.Rule#estimateRuleCost(java.util.List)}
* , where the rule's precomputable (weighted) score is cached.
- *
+ *
* @param featureString, the string of labeled and unlabeled features (probably straight from the
* grammar text file)
* @param prefix, the prefix to use for unlabeled features (probably "tm_OWNER_")
@@ -87,19 +87,19 @@ public class FeatureVector {
public FeatureVector(String featureString, String prefix) {
// System.err.println(String.format("FEATURES_OF(%s, %s)", featureString, prefix));
-
+
/*
* Read through the features on this rule, adding them to the feature vector. Unlabeled features
* are converted to a canonical form.
- *
+ *
* Note that it's bad form to mix unlabeled features and the named feature index they are mapped
* to, but we are being liberal in what we accept.
- *
+ *
* IMPORTANT: Note that, for historical reasons, the sign is reversed on all *dense* scores.
* This is the source of *no end* of confusion and should be done away with.
*/
this();
-
+
int denseFeatureIndex = 0;
if (!featureString.trim().equals("")) {
@@ -133,13 +133,13 @@ public class FeatureVector {
}
}
}
-
+
/**
* Register one or more dense features with the global weight vector. This assumes them global
* IDs, and then returns the index of the first feature (from which the calling feature function
* can infer them all). This *must* be called by every feature function wishing to register
* dense features!
- *
+ *
* @param featureFunctions {@link java.util.ArrayList} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
*/
public void registerDenseFeatures(ArrayList<FeatureFunction> featureFunctions) {
@@ -152,11 +152,11 @@ public class FeatureVector {
}
}
}
-
+
public ArrayList<Float> getDenseFeatures() {
return denseFeatures;
}
-
+
public HashMap<String,Float> getSparseFeatures() {
return sparseFeatures;
}
@@ -182,13 +182,13 @@ public class FeatureVector {
* Subtracts the weights in the other feature vector from this one. Note that this is not set
* subtraction; keys found in the other FeatureVector but not in this one will be initialized with
* a value of 0.0f before subtraction.
- *
+ *
* @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to subtract its score
*/
public void subtract(FeatureVector other) {
for (int i = 0; i < denseFeatures.size(); i++)
denseFeatures.set(i, getDense(i) - other.getDense(i));
-
+
for (String key : other.keySet()) {
float oldValue = (sparseFeatures.containsKey(key)) ? sparseFeatures.get(key) : 0.0f;
sparseFeatures.put(key, oldValue - other.getSparse(key));
@@ -198,16 +198,16 @@ public class FeatureVector {
/**
* Adds the weights in the other feature vector to this one. This is set union, with values shared
* between the two being summed.
- *
+ *
* @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to add its score
*/
public void add(FeatureVector other) {
while (denseFeatures.size() < other.denseFeatures.size())
denseFeatures.add(0.0f);
-
+
for (int i = 0; i < other.denseFeatures.size(); i++)
increment(i, other.getDense(i));
-
+
for (String key : other.keySet()) {
if (!sparseFeatures.containsKey(key))
sparseFeatures.put(key, other.getSparse(key));
@@ -215,10 +215,10 @@ public class FeatureVector {
sparseFeatures.put(key, sparseFeatures.get(key) + other.getSparse(key));
}
}
-
+
/**
* Return the weight of a feature by name, after checking to determine if it is sparse or dense.
- *
+ *
* @param feature String name of some feature
* @return the feature's weight
*/
@@ -233,7 +233,7 @@ public class FeatureVector {
/**
* Return the weight of a sparse feature, indexed by its name.
- *
+ *
* @param feature String name of some feature
* @return the sparse feature's weight, or 0 if not found.
*/
@@ -242,15 +242,15 @@ public class FeatureVector {
return sparseFeatures.get(feature);
return 0.0f;
}
-
+
public boolean hasValue(String name) {
return sparseFeatures.containsKey(name);
}
-
+
/**
* Return the weight of a dense feature, indexed by its feature index, or 0.0f, if the feature
* is not found. In other words, this is a safe way to query the dense feature vector.
- *
+ *
* @param id int representing of some dense feature
* @return the dense feature's value, or 0 if not found.
*/
@@ -263,7 +263,7 @@ public class FeatureVector {
public void increment(String feature, float value) {
sparseFeatures.put(feature, getSparse(feature) + value);
}
-
+
public void increment(int id, float value) {
while (id >= denseFeatures.size())
denseFeatures.add(0.0f);
@@ -273,7 +273,7 @@ public class FeatureVector {
/**
* Set the value of a feature. We need to first determine whether the feature is a dense or
* sparse one, then set accordingly.
- *
+ *
* @param feature String name of some feature
* @param value float value to set to the featue with the associated name
*/
@@ -287,7 +287,7 @@ public class FeatureVector {
// No dense feature was found; assume it's sparse
sparseFeatures.put(feature, value);
}
-
+
public void set(int id, float value) {
while (id >= denseFeatures.size())
denseFeatures.add(0.0f);
@@ -295,12 +295,17 @@ public class FeatureVector {
}
public Map<String, Float> getMap() {
- return sparseFeatures;
+ Map<String, Float> allFeatures = new HashMap<>(sparseFeatures.size() + denseFeatures.size());
+ allFeatures.putAll(sparseFeatures);
+ for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
+ allFeatures.put(DENSE_FEATURE_NAMES.get(i), getDense(i));
+ }
+ return allFeatures;
}
/**
* Computes the inner product between this feature vector and another one.
- *
+ *
* @param other a {@link org.apache.joshua.decoder.ff.FeatureVector} with which to compute the inner product
* @return float value representing the computation
*/
@@ -308,7 +313,7 @@ public class FeatureVector {
float cost = 0.0f;
for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++)
cost += getDense(i) * other.getDense(i);
-
+
for (String key : sparseFeatures.keySet())
cost += sparseFeatures.get(key) * other.getSparse(key);
@@ -323,20 +328,20 @@ public class FeatureVector {
/***
* Moses distinguishes sparse features as those containing an underscore, so we have to fake it
* to be compatible with their tuners.
- *
+ *
* @return trimmed Moses output string
*/
public String mosesString() {
StringBuilder outputString = new StringBuilder();
-
+
HashSet<String> printed_keys = new HashSet<String>();
-
+
// First print all the dense feature names in order
for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
outputString.append(String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i).replaceAll("_", "-"), getDense(i)));
printed_keys.add(DENSE_FEATURE_NAMES.get(i));
}
-
+
// Now print the sparse features
ArrayList<String> keys = new ArrayList<String>(sparseFeatures.keySet());
Collections.sort(keys);
@@ -351,7 +356,7 @@ public class FeatureVector {
}
return outputString.toString().trim();
}
-
+
/***
* Outputs a list of feature names. All dense features are printed. Feature names are printed
* in the order they were read in.
@@ -359,15 +364,15 @@ public class FeatureVector {
@Override
public String toString() {
StringBuilder outputString = new StringBuilder();
-
+
HashSet<String> printed_keys = new HashSet<String>();
-
+
// First print all the dense feature names in order
for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
outputString.append(String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i), getDense(i)));
printed_keys.add(DENSE_FEATURE_NAMES.get(i));
}
-
+
// Now print the rest of the features
ArrayList<String> keys = new ArrayList<String>(sparseFeatures.keySet());
Collections.sort(keys);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java b/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
index f762e31..d541fdc 100644
--- a/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
+++ b/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
@@ -36,59 +36,59 @@ public class LanguageModelFFTest {
private static final float WEIGHT = 0.5f;
private LanguageModelFF ff;
-
+
@Before
public void setUp() {
Decoder.resetGlobalState();
-
+
FeatureVector weights = new FeatureVector();
weights.set("lm_0", WEIGHT);
- String[] args = {"-lm_type", "berkeleylm", "-lm_order", "2", "-lm_file", "./joshua/test/lm/berkeley/lm"};
-
+ String[] args = {"-lm_type", "berkeleylm", "-lm_order", "2", "-lm_file", "./src/test/resources/lm/berkeley/lm"};
+
JoshuaConfiguration config = new JoshuaConfiguration();
ff = new LanguageModelFF(weights, args, config);
}
-
+
@After
public void tearDown() {
Decoder.resetGlobalState();
}
-
+
@Test
public void givenNonStartSymbol_whenEstimateFutureCost_thenMultipleWeightAndLogProbabilty() {
int[] left = {3};
NgramDPState currentState = new NgramDPState(left, new int[left.length]);
-
+
float score = ff.languageModel.sentenceLogProbability(left, 2, 1);
assertEquals(-99.0f, score, 0.0);
-
+
float cost = ff.estimateFutureCost(null, currentState, null);
assertEquals(score * WEIGHT, cost, 0.0);
}
-
+
@Test
public void givenOnlyStartSymbol_whenEstimateFutureCost_thenZeroResult() {
int startSymbolId = Vocabulary.id(Vocabulary.START_SYM);
int[] left = {startSymbolId};
NgramDPState currentState = new NgramDPState(left, new int[left.length]);
-
+
float score = ff.languageModel.sentenceLogProbability(left, 2, 2);
assertEquals(0.0f, score, 0.0);
-
+
float cost = ff.estimateFutureCost(null, currentState, null);
assertEquals(score * WEIGHT, cost, 0.0);
}
-
+
@Test
public void givenStartAndOneMoreSymbol_whenEstimateFutureCost_thenMultipleWeightAndLogProbabilty() {
int startSymbolId = Vocabulary.id(Vocabulary.START_SYM);
assertThat(startSymbolId, not(equalTo(3)));
int[] left = {startSymbolId, 3};
NgramDPState currentState = new NgramDPState(left, new int[left.length]);
-
+
float score = ff.languageModel.sentenceLogProbability(left, 2, 2);
assertEquals(-100.752754f, score, 0.0f);
-
+
float cost = ff.estimateFutureCost(null, currentState, null);
assertEquals(score * WEIGHT, cost, 0.0f);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java b/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
index 00a6a36..e5b2d69 100644
--- a/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
+++ b/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
@@ -27,6 +27,7 @@ import org.junit.After;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
import org.junit.runners.Parameterized.Parameters;
import org.apache.joshua.decoder.Decoder;
@@ -42,26 +43,26 @@ public class LMGrammarBerkeleyTest {
private static final String INPUT = "the chat-rooms";
private static final String[] OPTIONS = "-v 0 -output-format %f".split(" ");
-
+
private JoshuaConfiguration joshuaConfig;
private Decoder decoder;
-
+
@Parameters
public static List<String> lmFiles() {
- return Arrays.asList("resources/berkeley_lm/lm",
- "resources/berkeley_lm/lm.gz",
- "resources/berkeley_lm/lm.berkeleylm",
+ return Arrays.asList("resources/berkeley_lm/lm",
+ "resources/berkeley_lm/lm.gz",
+ "resources/berkeley_lm/lm.berkeleylm",
"resources/berkeley_lm/lm.berkeleylm.gz");
}
-
+
@After
public void tearDown() throws Exception {
decoder.cleanUp();
}
-
- //TODO @Parameters
+
+ @Parameter
public String lmFile;
-
+
@Test
public void verifyLM() {
joshuaConfig = new JoshuaConfiguration();
@@ -71,7 +72,7 @@ public class LMGrammarBerkeleyTest {
String translation = decode(INPUT).toString();
assertEquals(lmFile, "tm_glue_0=2.000 lm_0=-7.153\n", translation);
}
-
+
private Translation decode(String input) {
final Sentence sentence = new Sentence(input, 0, joshuaConfig);
return decoder.decode(sentence);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java b/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
index caeeeb3..44ef35d 100644
--- a/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
+++ b/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
@@ -33,6 +33,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
import org.junit.After;
import org.junit.Before;
+import org.junit.Ignore;
import org.junit.Test;
/**
@@ -40,15 +41,16 @@ import org.junit.Test;
* TODO (fhieber): this test strangely only works with StateMinimizing KenLM.
* This is to be investigated
*/
+@Ignore("re-enable as soon as kenlm native library support will be in place")
public class KBestExtractionTest {
-
+
private static final String CONFIG = "resources/kbest_extraction/joshua.config";
private static final String INPUT = "a b c d e";
private static final Path GOLD_PATH = Paths.get("resources/kbest_extraction/output.scores.gold");
-
+
private JoshuaConfiguration joshuaConfig = null;
private Decoder decoder = null;
-
+
@Before
public void setUp() throws Exception {
joshuaConfig = new JoshuaConfiguration();
@@ -56,20 +58,20 @@ public class KBestExtractionTest {
joshuaConfig.outputFormat = "%i ||| %s ||| %c";
decoder = new Decoder(joshuaConfig, "");
}
-
+
@After
public void tearDown() throws Exception {
decoder.cleanUp();
decoder = null;
}
-
+
@Test
public void givenInput_whenKbestExtraction_thenOutputIsAsExpected() throws IOException {
final String translation = decode(INPUT).toString();
final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
assertEquals(gold, translation);
}
-
+
private Translation decode(String input) {
final Sentence sentence = new Sentence(input, 0, joshuaConfig);
return decoder.decode(sentence);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java b/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
index 04078c6..a99338a 100644
--- a/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
+++ b/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
@@ -33,40 +33,42 @@ import org.apache.joshua.decoder.segment_file.Sentence;
import org.junit.After;
import org.junit.Before;
+import org.junit.Ignore;
import org.junit.Test;
/**
* Reimplements the constrained phrase decoding test
*/
+@Ignore("re-enable as soon as kenlm native library support will be in place")
public class ConstrainedPhraseDecodingTest {
-
+
private static final String CONFIG = "resources/phrase_decoder/constrained.config";
private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama ||| President Obama to hinder a strategy for Republican re @-@ election";
private static final Path GOLD_PATH = Paths.get("resources/phrase_decoder/constrained.output.gold");
-
+
private JoshuaConfiguration joshuaConfig = null;
private Decoder decoder = null;
-
+
@Before
public void setUp() throws Exception {
joshuaConfig = new JoshuaConfiguration();
joshuaConfig.readConfigFile(CONFIG);
decoder = new Decoder(joshuaConfig, "");
}
-
+
@After
public void tearDown() throws Exception {
decoder.cleanUp();
decoder = null;
}
-
+
@Test
public void givenInput_whenConstrainedPhraseDecoding_thenOutputIsAsExpected() throws IOException {
final String translation = decode(INPUT).toString();
final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
assertEquals(gold, translation);
}
-
+
private Translation decode(String input) {
final Sentence sentence = new Sentence(input, 0, joshuaConfig);
return decoder.decode(sentence);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index 794ecd5..b5bd612 100644
--- a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@ -33,40 +33,42 @@ import org.apache.joshua.decoder.segment_file.Sentence;
import org.junit.After;
import org.junit.Before;
+import org.junit.Ignore;
import org.junit.Test;
/**
* Reimplements the constrained phrase decoding test
*/
+@Ignore("re-enable as soon as kenlm native library support will be in place")
public class PhraseDecodingTest {
-
+
private static final String CONFIG = "resources/phrase_decoder/config";
private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
private static final Path GOLD_PATH = Paths.get("resources/phrase_decoder/output.gold");
-
+
private JoshuaConfiguration joshuaConfig = null;
private Decoder decoder = null;
-
+
@Before
public void setUp() throws Exception {
joshuaConfig = new JoshuaConfiguration();
joshuaConfig.readConfigFile(CONFIG);
decoder = new Decoder(joshuaConfig, "");
}
-
+
@After
public void tearDown() throws Exception {
decoder.cleanUp();
decoder = null;
}
-
+
@Test
public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
final String translation = decode(INPUT).toString();
final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
assertEquals(gold, translation);
}
-
+
private Translation decode(String input) {
final Sentence sentence = new Sentence(input, 0, joshuaConfig);
return decoder.decode(sentence);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/KenLmTest.java b/src/test/java/org/apache/joshua/system/KenLmTest.java
index d61e303..1f032d8 100644
--- a/src/test/java/org/apache/joshua/system/KenLmTest.java
+++ b/src/test/java/org/apache/joshua/system/KenLmTest.java
@@ -26,6 +26,7 @@ import org.apache.joshua.decoder.ff.lm.KenLM;
import org.junit.After;
import org.junit.Before;
+import org.junit.Ignore;
import org.junit.Test;
/**
@@ -34,6 +35,7 @@ import org.junit.Test;
* If run in Eclipse, add -Djava.library.path=build/lib to JVM arguments
* of the run configuration.
*/
+@Ignore("re-enable as soon as kenlm native library support will be in place")
public class KenLmTest {
private static final String LANGUAGE_MODEL_PATH = "resources/kenlm/oilers.kenlm";
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/StructuredOutputTest.java b/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
index b8a2496..f5e9d34 100644
--- a/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
+++ b/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
@@ -36,7 +36,7 @@ import org.junit.Assert;
* a bunch of capital letters to lowercase letters. Rules in the test grammar
* drop and generate additional words and simulate reordering of rules, so that
* proper extraction of word alignments can be tested.
- *
+ *
* @author fhieber
*/
public class StructuredOutputTest {
@@ -67,7 +67,7 @@ public class StructuredOutputTest {
joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path resources/grammar.glue");
joshuaConfig.goal_symbol = "[GOAL]";
joshuaConfig.default_non_terminal = "[X]";
- joshuaConfig.features.add("feature_function = OOVPenalty");
+ joshuaConfig.features.add("OOVPenalty");
joshuaConfig.weights.add("tm_pt_0 1");
joshuaConfig.weights.add("tm_pt_1 1");
joshuaConfig.weights.add("tm_pt_2 1");
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java b/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
index 69412e2..6718858 100644
--- a/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
+++ b/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
@@ -42,7 +42,7 @@ import org.junit.Test;
* drop and generate additional words and simulate reordering of rules, so that
* proper extraction of word alignments and other information from the decoder
* can be tested.
- *
+ *
* @author fhieber
*/
public class StructuredTranslationTest {
@@ -70,6 +70,7 @@ public class StructuredTranslationTest {
EXPECTED_FEATURES.put("tm_pt_4", -3.0f);
EXPECTED_FEATURES.put("tm_pt_5", -3.0f);
EXPECTED_FEATURES.put("OOV", 7.0f);
+ EXPECTED_FEATURES.put("OOVPenalty", 0.0f);
}
@Before
@@ -109,30 +110,30 @@ public class StructuredTranslationTest {
Sentence sentence = new Sentence(input, 0, joshuaConfig);
return decoder.decode(sentence);
}
-
+
@Test
public void givenInput_whenRegularOutputFormat_thenExpectedOutput() {
// GIVEN
joshuaConfig.use_structured_output = false;
joshuaConfig.outputFormat = "%s | %a ";
-
+
// WHEN
final String translation = decode(INPUT).toString().trim();
-
+
// THEN
assertEquals(EXPECTED_TRANSLATION + " | " + EXPECTED_WORD_ALIGNMENT_STRING, translation);
}
-
+
@Test
public void givenInput_whenRegularOutputFormatWithTopN1_thenExpectedOutput() {
// GIVEN
joshuaConfig.use_structured_output = false;
joshuaConfig.outputFormat = "%s | %e | %a | %c";
joshuaConfig.topN = 1;
-
+
// WHEN
final String translation = decode(INPUT).toString().trim();
-
+
// THEN
assertEquals(EXPECTED_TRANSLATION + " | " + INPUT + " | " + EXPECTED_WORD_ALIGNMENT_STRING + String.format(" | %.3f", EXPECTED_SCORE),
translation);
@@ -143,7 +144,7 @@ public class StructuredTranslationTest {
// GIVEN
joshuaConfig.use_structured_output = true;
joshuaConfig.topN = 0;
-
+
// WHEN
final Translation translation = decode(INPUT);
final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
@@ -152,7 +153,7 @@ public class StructuredTranslationTest {
final float translationScore = structuredTranslation.getTranslationScore();
final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
final Map<String,Float> translationFeatures = structuredTranslation.getTranslationFeatures();
-
+
// THEN
assertTrue(translation.getStructuredTranslations().size() == 1);
assertEquals(EXPECTED_TRANSLATION, translationString);
@@ -162,13 +163,13 @@ public class StructuredTranslationTest {
assertEquals(wordAlignment.size(), translatedTokens.size());
assertEquals(EXPECTED_FEATURES.entrySet(), translationFeatures.entrySet());
}
-
+
@Test
public void givenInput_whenStructuredOutputFormatWithTopN1_thenExpectedOutput() {
// GIVEN
joshuaConfig.use_structured_output = true;
joshuaConfig.topN = 1;
-
+
// WHEN
final Translation translation = decode(INPUT);
final List<StructuredTranslation> structuredTranslations = translation.getStructuredTranslations();
@@ -178,7 +179,7 @@ public class StructuredTranslationTest {
final float translationScore = structuredTranslation.getTranslationScore();
final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
final Map<String,Float> translationFeatures = structuredTranslation.getTranslationFeatures();
-
+
// THEN
assertTrue(structuredTranslations.size() == 1);
assertEquals(EXPECTED_TRANSLATION, translationString);
@@ -188,19 +189,19 @@ public class StructuredTranslationTest {
assertEquals(wordAlignment.size(), translatedTokens.size());
assertEquals(EXPECTED_FEATURES.entrySet(), translationFeatures.entrySet());
}
-
+
@Test
public void givenInput_whenStructuredOutputFormatWithKBest_thenExpectedOutput() {
// GIVEN
joshuaConfig.use_structured_output = true;
joshuaConfig.topN = 100;
-
+
// WHEN
final Translation translation = decode(INPUT);
final List<StructuredTranslation> structuredTranslations = translation.getStructuredTranslations();
final StructuredTranslation viterbiTranslation = structuredTranslations.get(0);
final StructuredTranslation lastKBest = structuredTranslations.get(structuredTranslations.size() - 1);
-
+
// THEN
assertEquals(structuredTranslations.size(), EXPECTED_NBEST_LIST_SIZE);
assertTrue(structuredTranslations.size() > 1);
@@ -212,14 +213,14 @@ public class StructuredTranslationTest {
// last entry in KBEST is all input words untranslated, should have 8 OOVs.
assertEquals(INPUT, lastKBest.getTranslationString());
assertEquals(-800.0, lastKBest.getTranslationFeatures().get("OOVPenalty"), 0.0001);
-
+
}
-
+
@Test
public void givenEmptyInput_whenStructuredOutputFormat_thenEmptyOutput() {
// GIVEN
joshuaConfig.use_structured_output = true;
-
+
// WHEN
final Translation translation = decode("");
final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
@@ -227,20 +228,20 @@ public class StructuredTranslationTest {
final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
final float translationScore = structuredTranslation.getTranslationScore();
final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
-
+
// THEN
assertEquals("", translationString);
assertTrue(translatedTokens.isEmpty());
assertEquals(0, translationScore, 0.00001);
assertTrue(wordAlignment.isEmpty());
}
-
+
@Test
public void givenOOVInput_whenStructuredOutputFormat_thenOOVOutput() {
// GIVEN
joshuaConfig.use_structured_output = true;
final String input = "gabarbl";
-
+
// WHEN
final Translation translation = decode(input);
final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
@@ -248,23 +249,24 @@ public class StructuredTranslationTest {
final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
final float translationScore = structuredTranslation.getTranslationScore();
final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
-
+
// THEN
assertEquals(input, translationString);
assertTrue(translatedTokens.contains(input));
assertEquals(-99.0, translationScore, 0.00001);
assertTrue(wordAlignment.contains(asList(0)));
}
-
+
@Test
public void givenEmptyInput_whenRegularOutputFormat_thenNewlineOutput() {
// GIVEN
joshuaConfig.use_structured_output = false;
-
+ joshuaConfig.outputFormat = "%s";
+
// WHEN
final Translation translation = decode("");
final String translationString = translation.toString();
-
+
// THEN
assertEquals("\n", translationString);
}
[2/2] incubator-joshua git commit: Merge branch 'junit'
Posted by mj...@apache.org.
Merge branch 'junit'
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/35e9c58f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/35e9c58f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/35e9c58f
Branch: refs/heads/master
Commit: 35e9c58ffead1b71de8cde5f6a3b74804e58b118
Parents: c4ce122 b1743cb
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Jun 2 09:27:42 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Jun 2 09:27:42 2016 -0400
----------------------------------------------------------------------
pom.xml | 12 ++-
.../apache/joshua/decoder/ff/FeatureVector.java | 91 +++++++++++---------
.../decoder/ff/lm/LanguageModelFFTest.java | 28 +++---
.../lm/berkeley_lm/LMGrammarBerkeleyTest.java | 21 ++---
.../kbest_extraction/KBestExtractionTest.java | 14 +--
.../ConstrainedPhraseDecodingTest.java | 14 +--
.../phrase/decode/PhraseDecodingTest.java | 14 +--
.../org/apache/joshua/system/KenLmTest.java | 2 +
.../joshua/system/StructuredOutputTest.java | 4 +-
.../system/StructuredTranslationTest.java | 52 +++++------
10 files changed, 136 insertions(+), 116 deletions(-)
----------------------------------------------------------------------