You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/02 13:27:48 UTC

[1/2] incubator-joshua git commit: Re-enabled JUnit tests with a newer surefire plugin; fixed some of the tests; KenLM tests have been temporarily disabled. Cleaned whitespace. Fixed a bug in featurevector

Repository: incubator-joshua
Updated Branches:
  refs/heads/master c4ce122a7 -> 35e9c58ff


Re-enabled JUnit tests with a newer surefire plugin; fixed some of the tests; KenLM tests have been temporarily disabled. Cleaned whitespace.  Fixed a bug in featurevector


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/b1743cba
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/b1743cba
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/b1743cba

Branch: refs/heads/master
Commit: b1743cbad58523a6f33883fe971c20098fd5153c
Parents: c4ce122
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Thu Jun 2 14:46:31 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Jun 2 14:46:31 2016 +0200

----------------------------------------------------------------------
 pom.xml                                         | 12 ++-
 .../apache/joshua/decoder/ff/FeatureVector.java | 91 +++++++++++---------
 .../decoder/ff/lm/LanguageModelFFTest.java      | 28 +++---
 .../lm/berkeley_lm/LMGrammarBerkeleyTest.java   | 21 ++---
 .../kbest_extraction/KBestExtractionTest.java   | 14 +--
 .../ConstrainedPhraseDecodingTest.java          | 14 +--
 .../phrase/decode/PhraseDecodingTest.java       | 14 +--
 .../org/apache/joshua/system/KenLmTest.java     |  2 +
 .../joshua/system/StructuredOutputTest.java     |  4 +-
 .../system/StructuredTranslationTest.java       | 52 +++++------
 10 files changed, 136 insertions(+), 116 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 725fd0d..40a6940 100644
--- a/pom.xml
+++ b/pom.xml
@@ -28,8 +28,8 @@
   <packaging>jar</packaging>
   <version>6.0.6-SNAPSHOT</version>
   <name>Apache Joshua Machine Translation Toolkit</name>
-  <description>Joshua is an open-source statistical machine 
-  translation decoder for phrase-based, hierarchical, 
+  <description>Joshua is an open-source statistical machine
+  translation decoder for phrase-based, hierarchical,
   and syntax-based machine translation, written in Java.
   </description>
   <url>http://joshua.incubator.apache.org</url>
@@ -141,6 +141,10 @@
           </descriptorRefs>
         </configuration>
       </plugin>
+      <plugin>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <version>2.19.1</version>
+      </plugin>
     </plugins>
   </build>
   <dependencies>
@@ -203,12 +207,12 @@
       <artifactId>slf4j-log4j12</artifactId>
       <version>${slf4j.version}</version>
     </dependency>
-    
+
     <!-- Test Dependencies -->
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
-      <version>4.10</version>
+      <version>4.12</version>
       <scope>test</scope>
     </dependency>
     <dependency>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java b/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
index d722de5..1b39c78 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
@@ -28,17 +28,17 @@ import java.util.Set;
 /**
  * An implementation of a sparse feature vector, using for representing both weights and feature
  * values.
- * 
+ *
  * This class is used to hold both the decoder weights and the feature values accumulated across
  * each edge. When features are read in upon decoder startup, they all start out as sparse features
  * and are stored in the hash table. After the feature functions have been loaded, the decoder
  * queries each of them for their sparse features via {@link registerDenseFeatures}. Those features
  * returned by each decoder are then *removed* from the sparse feature hash and placed in the dense
  * feature array. Therefore, when a feature registers a dense feature, it should take care to
- * query either {@link org.apache.joshua.decoder.ff.FeatureVector#getDense(int)} or 
- * {@link org.apache.joshua.decoder.ff.FeatureVector#getSparse(String)} when asking for the feature 
- * values later on. 
- * 
+ * query either {@link org.apache.joshua.decoder.ff.FeatureVector#getDense(int)} or
+ * {@link org.apache.joshua.decoder.ff.FeatureVector#getSparse(String)} when asking for the feature
+ * values later on.
+ *
  * @author Matt Post post@cs.jhu.edu
  */
 
@@ -68,18 +68,18 @@ public class FeatureVector {
   /**
    * This version of the constructor takes an uninitialized feature with potentially intermingled
    * labeled and unlabeled feature values, of the format:
-   * 
+   *
    * [feature1=]value [feature2=]value
-   * 
+   *
    * It produces a Feature Vector where all unlabeled features have been labeled by appending the
    * unlabeled feature index (starting at 0) to the defaultPrefix value.
-   * 
+   *
    * **IMPORTANT** The feature values are inverted, for historical reasons, which leads to a lot
-   * of confusion. They have to be inverted here and when the score is actually computed. They 
+   * of confusion. They have to be inverted here and when the score is actually computed. They
    * are inverted here (which is used to build the feature vector representation of a rule's dense
    * features) and in {@link org.apache.joshua.decoder.ff.tm.Rule#estimateRuleCost(java.util.List)}
    * , where the rule's precomputable (weighted) score is cached.
-   * 
+   *
    * @param featureString, the string of labeled and unlabeled features (probably straight from the
    *          grammar text file)
    * @param prefix, the prefix to use for unlabeled features (probably "tm_OWNER_")
@@ -87,19 +87,19 @@ public class FeatureVector {
   public FeatureVector(String featureString, String prefix) {
 
 //    System.err.println(String.format("FEATURES_OF(%s, %s)", featureString, prefix));
-    
+
     /*
      * Read through the features on this rule, adding them to the feature vector. Unlabeled features
      * are converted to a canonical form.
-     * 
+     *
      * Note that it's bad form to mix unlabeled features and the named feature index they are mapped
      * to, but we are being liberal in what we accept.
-     * 
+     *
      * IMPORTANT: Note that, for historical reasons, the sign is reversed on all *dense* scores.
      * This is the source of *no end* of confusion and should be done away with.
      */
     this();
-    
+
     int denseFeatureIndex = 0;
 
     if (!featureString.trim().equals("")) {
@@ -133,13 +133,13 @@ public class FeatureVector {
       }
     }
   }
-  
+
   /**
    * Register one or more dense features with the global weight vector. This assumes them global
    * IDs, and then returns the index of the first feature (from which the calling feature function
    * can infer them all). This *must* be called by every feature function wishing to register
    * dense features!
-   * 
+   *
    * @param featureFunctions {@link java.util.ArrayList} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
    */
   public void registerDenseFeatures(ArrayList<FeatureFunction> featureFunctions) {
@@ -152,11 +152,11 @@ public class FeatureVector {
       }
     }
   }
-  
+
   public ArrayList<Float> getDenseFeatures() {
     return denseFeatures;
   }
-  
+
   public HashMap<String,Float> getSparseFeatures() {
     return sparseFeatures;
   }
@@ -182,13 +182,13 @@ public class FeatureVector {
    * Subtracts the weights in the other feature vector from this one. Note that this is not set
    * subtraction; keys found in the other FeatureVector but not in this one will be initialized with
    * a value of 0.0f before subtraction.
-   * 
+   *
    * @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to subtract its score
    */
   public void subtract(FeatureVector other) {
     for (int i = 0; i < denseFeatures.size(); i++)
       denseFeatures.set(i, getDense(i) - other.getDense(i));
-    
+
     for (String key : other.keySet()) {
       float oldValue = (sparseFeatures.containsKey(key)) ? sparseFeatures.get(key) : 0.0f;
       sparseFeatures.put(key, oldValue - other.getSparse(key));
@@ -198,16 +198,16 @@ public class FeatureVector {
   /**
    * Adds the weights in the other feature vector to this one. This is set union, with values shared
    * between the two being summed.
-   * 
+   *
    * @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to add its score
    */
   public void add(FeatureVector other) {
     while (denseFeatures.size() < other.denseFeatures.size())
       denseFeatures.add(0.0f);
-    
+
     for (int i = 0; i < other.denseFeatures.size(); i++)
       increment(i, other.getDense(i));
-    
+
     for (String key : other.keySet()) {
       if (!sparseFeatures.containsKey(key))
         sparseFeatures.put(key, other.getSparse(key));
@@ -215,10 +215,10 @@ public class FeatureVector {
         sparseFeatures.put(key, sparseFeatures.get(key) + other.getSparse(key));
     }
   }
-  
+
   /**
    * Return the weight of a feature by name, after checking to determine if it is sparse or dense.
-   * 
+   *
    * @param feature String name of some feature
    * @return the feature's weight
    */
@@ -233,7 +233,7 @@ public class FeatureVector {
 
   /**
    * Return the weight of a sparse feature, indexed by its name.
-   * 
+   *
    * @param feature String name of some feature
    * @return the sparse feature's weight, or 0 if not found.
    */
@@ -242,15 +242,15 @@ public class FeatureVector {
       return sparseFeatures.get(feature);
     return 0.0f;
   }
-  
+
   public boolean hasValue(String name) {
     return sparseFeatures.containsKey(name);
   }
-  
+
   /**
    * Return the weight of a dense feature, indexed by its feature index, or 0.0f, if the feature
    * is not found. In other words, this is a safe way to query the dense feature vector.
-   * 
+   *
    * @param id int representing of some dense feature
    * @return the dense feature's value, or 0 if not found.
    */
@@ -263,7 +263,7 @@ public class FeatureVector {
   public void increment(String feature, float value) {
     sparseFeatures.put(feature, getSparse(feature) + value);
   }
-  
+
   public void increment(int id, float value) {
     while (id >= denseFeatures.size())
       denseFeatures.add(0.0f);
@@ -273,7 +273,7 @@ public class FeatureVector {
   /**
    * Set the value of a feature. We need to first determine whether the feature is a dense or
    * sparse one, then set accordingly.
-   * 
+   *
    * @param feature String name of some feature
    * @param value float value to set to the featue with the associated name
    */
@@ -287,7 +287,7 @@ public class FeatureVector {
     // No dense feature was found; assume it's sparse
     sparseFeatures.put(feature, value);
   }
-  
+
   public void set(int id, float value) {
     while (id >= denseFeatures.size())
       denseFeatures.add(0.0f);
@@ -295,12 +295,17 @@ public class FeatureVector {
   }
 
   public Map<String, Float> getMap() {
-    return sparseFeatures;
+    Map<String, Float> allFeatures = new HashMap<>(sparseFeatures.size() + denseFeatures.size());
+    allFeatures.putAll(sparseFeatures);
+    for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
+      allFeatures.put(DENSE_FEATURE_NAMES.get(i), getDense(i));
+    }
+    return allFeatures;
   }
 
   /**
    * Computes the inner product between this feature vector and another one.
-   * 
+   *
    * @param other a {@link org.apache.joshua.decoder.ff.FeatureVector} with which to compute the inner product
    * @return float value representing the computation
    */
@@ -308,7 +313,7 @@ public class FeatureVector {
     float cost = 0.0f;
     for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++)
       cost += getDense(i) * other.getDense(i);
-    
+
     for (String key : sparseFeatures.keySet())
       cost += sparseFeatures.get(key) * other.getSparse(key);
 
@@ -323,20 +328,20 @@ public class FeatureVector {
   /***
    * Moses distinguishes sparse features as those containing an underscore, so we have to fake it
    * to be compatible with their tuners.
-   * 
+   *
    * @return trimmed Moses output string
    */
   public String mosesString() {
     StringBuilder outputString = new StringBuilder();
-    
+
     HashSet<String> printed_keys = new HashSet<String>();
-    
+
     // First print all the dense feature names in order
     for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
       outputString.append(String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i).replaceAll("_", "-"), getDense(i)));
       printed_keys.add(DENSE_FEATURE_NAMES.get(i));
     }
-    
+
     // Now print the sparse features
     ArrayList<String> keys = new ArrayList<String>(sparseFeatures.keySet());
     Collections.sort(keys);
@@ -351,7 +356,7 @@ public class FeatureVector {
     }
     return outputString.toString().trim();
   }
-    
+
   /***
    * Outputs a list of feature names. All dense features are printed. Feature names are printed
    * in the order they were read in.
@@ -359,15 +364,15 @@ public class FeatureVector {
   @Override
   public String toString() {
     StringBuilder outputString = new StringBuilder();
-    
+
     HashSet<String> printed_keys = new HashSet<String>();
-    
+
     // First print all the dense feature names in order
     for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
       outputString.append(String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i), getDense(i)));
       printed_keys.add(DENSE_FEATURE_NAMES.get(i));
     }
-    
+
     // Now print the rest of the features
     ArrayList<String> keys = new ArrayList<String>(sparseFeatures.keySet());
     Collections.sort(keys);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java b/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
index f762e31..d541fdc 100644
--- a/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
+++ b/src/test/java/org/apache/joshua/decoder/ff/lm/LanguageModelFFTest.java
@@ -36,59 +36,59 @@ public class LanguageModelFFTest {
   private static final float WEIGHT = 0.5f;
 
   private LanguageModelFF ff;
-  
+
   @Before
   public void setUp() {
     Decoder.resetGlobalState();
-    
+
     FeatureVector weights = new FeatureVector();
     weights.set("lm_0", WEIGHT);
-    String[] args = {"-lm_type", "berkeleylm", "-lm_order", "2", "-lm_file", "./joshua/test/lm/berkeley/lm"};
-    
+    String[] args = {"-lm_type", "berkeleylm", "-lm_order", "2", "-lm_file", "./src/test/resources/lm/berkeley/lm"};
+
     JoshuaConfiguration config = new JoshuaConfiguration();
     ff = new LanguageModelFF(weights, args, config);
   }
-  
+
   @After
   public void tearDown() {
     Decoder.resetGlobalState();
   }
-  
+
   @Test
   public void givenNonStartSymbol_whenEstimateFutureCost_thenMultipleWeightAndLogProbabilty() {
     int[] left = {3};
     NgramDPState currentState = new NgramDPState(left, new int[left.length]);
-    
+
     float score = ff.languageModel.sentenceLogProbability(left, 2, 1);
     assertEquals(-99.0f, score, 0.0);
-    
+
     float cost = ff.estimateFutureCost(null, currentState, null);
     assertEquals(score * WEIGHT, cost, 0.0);
   }
-  
+
   @Test
   public void givenOnlyStartSymbol_whenEstimateFutureCost_thenZeroResult() {
     int startSymbolId = Vocabulary.id(Vocabulary.START_SYM);
     int[] left = {startSymbolId};
     NgramDPState currentState = new NgramDPState(left, new int[left.length]);
-    
+
     float score = ff.languageModel.sentenceLogProbability(left, 2, 2);
     assertEquals(0.0f, score, 0.0);
-    
+
     float cost = ff.estimateFutureCost(null, currentState, null);
     assertEquals(score * WEIGHT, cost, 0.0);
   }
-  
+
   @Test
   public void givenStartAndOneMoreSymbol_whenEstimateFutureCost_thenMultipleWeightAndLogProbabilty() {
     int startSymbolId = Vocabulary.id(Vocabulary.START_SYM);
     assertThat(startSymbolId, not(equalTo(3)));
     int[] left = {startSymbolId, 3};
     NgramDPState currentState = new NgramDPState(left, new int[left.length]);
-    
+
     float score = ff.languageModel.sentenceLogProbability(left, 2, 2);
     assertEquals(-100.752754f, score, 0.0f);
-    
+
     float cost = ff.estimateFutureCost(null, currentState, null);
     assertEquals(score * WEIGHT, cost, 0.0f);
   }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java b/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
index 00a6a36..e5b2d69 100644
--- a/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
+++ b/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
@@ -27,6 +27,7 @@ import org.junit.After;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
 import org.junit.runners.Parameterized.Parameters;
 
 import org.apache.joshua.decoder.Decoder;
@@ -42,26 +43,26 @@ public class LMGrammarBerkeleyTest {
 
   private static final String INPUT = "the chat-rooms";
   private static final String[] OPTIONS = "-v 0 -output-format %f".split(" ");
-  
+
   private JoshuaConfiguration joshuaConfig;
   private Decoder decoder;
-  
+
   @Parameters
   public static List<String> lmFiles() {
-    return Arrays.asList("resources/berkeley_lm/lm", 
-        "resources/berkeley_lm/lm.gz", 
-        "resources/berkeley_lm/lm.berkeleylm", 
+    return Arrays.asList("resources/berkeley_lm/lm",
+        "resources/berkeley_lm/lm.gz",
+        "resources/berkeley_lm/lm.berkeleylm",
         "resources/berkeley_lm/lm.berkeleylm.gz");
   }
-  
+
   @After
   public void tearDown() throws Exception {
     decoder.cleanUp();
   }
-  
-  //TODO @Parameters
+
+  @Parameter
   public String lmFile;
-  
+
   @Test
   public void verifyLM() {
     joshuaConfig = new JoshuaConfiguration();
@@ -71,7 +72,7 @@ public class LMGrammarBerkeleyTest {
     String translation = decode(INPUT).toString();
     assertEquals(lmFile, "tm_glue_0=2.000 lm_0=-7.153\n", translation);
   }
-  
+
   private Translation decode(String input) {
     final Sentence sentence = new Sentence(input, 0, joshuaConfig);
     return decoder.decode(sentence);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java b/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
index caeeeb3..44ef35d 100644
--- a/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
+++ b/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
@@ -33,6 +33,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
 
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Test;
 
 /**
@@ -40,15 +41,16 @@ import org.junit.Test;
  * TODO (fhieber): this test strangely only works with StateMinimizing KenLM.
  * This is to be investigated
  */
+@Ignore("re-enable as soon as kenlm native library support will be in place")
 public class KBestExtractionTest {
-  
+
   private static final String CONFIG = "resources/kbest_extraction/joshua.config";
   private static final String INPUT = "a b c d e";
   private static final Path GOLD_PATH = Paths.get("resources/kbest_extraction/output.scores.gold");
-  
+
   private JoshuaConfiguration joshuaConfig = null;
   private Decoder decoder = null;
-  
+
   @Before
   public void setUp() throws Exception {
     joshuaConfig = new JoshuaConfiguration();
@@ -56,20 +58,20 @@ public class KBestExtractionTest {
     joshuaConfig.outputFormat = "%i ||| %s ||| %c";
     decoder = new Decoder(joshuaConfig, "");
   }
-  
+
   @After
   public void tearDown() throws Exception {
     decoder.cleanUp();
     decoder = null;
   }
-  
+
   @Test
   public void givenInput_whenKbestExtraction_thenOutputIsAsExpected() throws IOException {
     final String translation = decode(INPUT).toString();
     final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
     assertEquals(gold, translation);
   }
-  
+
   private Translation decode(String input) {
     final Sentence sentence = new Sentence(input, 0, joshuaConfig);
     return decoder.decode(sentence);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java b/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
index 04078c6..a99338a 100644
--- a/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
+++ b/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
@@ -33,40 +33,42 @@ import org.apache.joshua.decoder.segment_file.Sentence;
 
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Test;
 
 /**
  * Reimplements the constrained phrase decoding test
  */
+@Ignore("re-enable as soon as kenlm native library support will be in place")
 public class ConstrainedPhraseDecodingTest {
-  
+
   private static final String CONFIG = "resources/phrase_decoder/constrained.config";
   private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama ||| President Obama to hinder a strategy for Republican re @-@ election";
   private static final Path GOLD_PATH = Paths.get("resources/phrase_decoder/constrained.output.gold");
-  
+
   private JoshuaConfiguration joshuaConfig = null;
   private Decoder decoder = null;
-  
+
   @Before
   public void setUp() throws Exception {
     joshuaConfig = new JoshuaConfiguration();
     joshuaConfig.readConfigFile(CONFIG);
     decoder = new Decoder(joshuaConfig, "");
   }
-  
+
   @After
   public void tearDown() throws Exception {
     decoder.cleanUp();
     decoder = null;
   }
-  
+
   @Test
   public void givenInput_whenConstrainedPhraseDecoding_thenOutputIsAsExpected() throws IOException {
     final String translation = decode(INPUT).toString();
     final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
     assertEquals(gold, translation);
   }
-  
+
   private Translation decode(String input) {
     final Sentence sentence = new Sentence(input, 0, joshuaConfig);
     return decoder.decode(sentence);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index 794ecd5..b5bd612 100644
--- a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@ -33,40 +33,42 @@ import org.apache.joshua.decoder.segment_file.Sentence;
 
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Test;
 
 /**
  * Reimplements the constrained phrase decoding test
  */
+@Ignore("re-enable as soon as kenlm native library support will be in place")
 public class PhraseDecodingTest {
-  
+
   private static final String CONFIG = "resources/phrase_decoder/config";
   private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
   private static final Path GOLD_PATH = Paths.get("resources/phrase_decoder/output.gold");
-  
+
   private JoshuaConfiguration joshuaConfig = null;
   private Decoder decoder = null;
-  
+
   @Before
   public void setUp() throws Exception {
     joshuaConfig = new JoshuaConfiguration();
     joshuaConfig.readConfigFile(CONFIG);
     decoder = new Decoder(joshuaConfig, "");
   }
-  
+
   @After
   public void tearDown() throws Exception {
     decoder.cleanUp();
     decoder = null;
   }
-  
+
   @Test
   public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
     final String translation = decode(INPUT).toString();
     final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
     assertEquals(gold, translation);
   }
-  
+
   private Translation decode(String input) {
     final Sentence sentence = new Sentence(input, 0, joshuaConfig);
     return decoder.decode(sentence);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/KenLmTest.java b/src/test/java/org/apache/joshua/system/KenLmTest.java
index d61e303..1f032d8 100644
--- a/src/test/java/org/apache/joshua/system/KenLmTest.java
+++ b/src/test/java/org/apache/joshua/system/KenLmTest.java
@@ -26,6 +26,7 @@ import org.apache.joshua.decoder.ff.lm.KenLM;
 
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Test;
 
 /**
@@ -34,6 +35,7 @@ import org.junit.Test;
  * If run in Eclipse, add -Djava.library.path=build/lib to JVM arguments
  * of the run configuration.
  */
+@Ignore("re-enable as soon as kenlm native library support will be in place")
 public class KenLmTest {
 
   private static final String LANGUAGE_MODEL_PATH = "resources/kenlm/oilers.kenlm";

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/StructuredOutputTest.java b/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
index b8a2496..f5e9d34 100644
--- a/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
+++ b/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
@@ -36,7 +36,7 @@ import org.junit.Assert;
  * a bunch of capital letters to lowercase letters. Rules in the test grammar
  * drop and generate additional words and simulate reordering of rules, so that
  * proper extraction of word alignments can be tested.
- * 
+ *
  * @author fhieber
  */
 public class StructuredOutputTest {
@@ -67,7 +67,7 @@ public class StructuredOutputTest {
     joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path resources/grammar.glue");
     joshuaConfig.goal_symbol = "[GOAL]";
     joshuaConfig.default_non_terminal = "[X]";
-    joshuaConfig.features.add("feature_function = OOVPenalty");
+    joshuaConfig.features.add("OOVPenalty");
     joshuaConfig.weights.add("tm_pt_0 1");
     joshuaConfig.weights.add("tm_pt_1 1");
     joshuaConfig.weights.add("tm_pt_2 1");

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1743cba/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java b/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
index 69412e2..6718858 100644
--- a/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
+++ b/src/test/java/org/apache/joshua/system/StructuredTranslationTest.java
@@ -42,7 +42,7 @@ import org.junit.Test;
  * drop and generate additional words and simulate reordering of rules, so that
  * proper extraction of word alignments and other information from the decoder
  * can be tested.
- * 
+ *
  * @author fhieber
  */
 public class StructuredTranslationTest {
@@ -70,6 +70,7 @@ public class StructuredTranslationTest {
     EXPECTED_FEATURES.put("tm_pt_4", -3.0f);
     EXPECTED_FEATURES.put("tm_pt_5", -3.0f);
     EXPECTED_FEATURES.put("OOV", 7.0f);
+    EXPECTED_FEATURES.put("OOVPenalty", 0.0f);
   }
 
   @Before
@@ -109,30 +110,30 @@ public class StructuredTranslationTest {
     Sentence sentence = new Sentence(input, 0, joshuaConfig);
     return decoder.decode(sentence);
   }
-  
+
   @Test
   public void givenInput_whenRegularOutputFormat_thenExpectedOutput() {
     // GIVEN
     joshuaConfig.use_structured_output = false;
     joshuaConfig.outputFormat = "%s | %a ";
-    
+
     // WHEN
     final String translation = decode(INPUT).toString().trim();
-    
+
     // THEN
     assertEquals(EXPECTED_TRANSLATION + " | " + EXPECTED_WORD_ALIGNMENT_STRING, translation);
   }
-  
+
   @Test
   public void givenInput_whenRegularOutputFormatWithTopN1_thenExpectedOutput() {
     // GIVEN
     joshuaConfig.use_structured_output = false;
     joshuaConfig.outputFormat = "%s | %e | %a | %c";
     joshuaConfig.topN = 1;
-    
+
     // WHEN
     final String translation = decode(INPUT).toString().trim();
-    
+
     // THEN
     assertEquals(EXPECTED_TRANSLATION + " | " + INPUT + " | " + EXPECTED_WORD_ALIGNMENT_STRING + String.format(" | %.3f", EXPECTED_SCORE),
         translation);
@@ -143,7 +144,7 @@ public class StructuredTranslationTest {
     // GIVEN
     joshuaConfig.use_structured_output = true;
     joshuaConfig.topN = 0;
-    
+
     // WHEN
     final Translation translation = decode(INPUT);
     final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
@@ -152,7 +153,7 @@ public class StructuredTranslationTest {
     final float translationScore = structuredTranslation.getTranslationScore();
     final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
     final Map<String,Float> translationFeatures = structuredTranslation.getTranslationFeatures();
-    
+
     // THEN
     assertTrue(translation.getStructuredTranslations().size() == 1);
     assertEquals(EXPECTED_TRANSLATION, translationString);
@@ -162,13 +163,13 @@ public class StructuredTranslationTest {
     assertEquals(wordAlignment.size(), translatedTokens.size());
     assertEquals(EXPECTED_FEATURES.entrySet(), translationFeatures.entrySet());
   }
-  
+
   @Test
   public void givenInput_whenStructuredOutputFormatWithTopN1_thenExpectedOutput() {
     // GIVEN
     joshuaConfig.use_structured_output = true;
     joshuaConfig.topN = 1;
-    
+
     // WHEN
     final Translation translation = decode(INPUT);
     final List<StructuredTranslation> structuredTranslations = translation.getStructuredTranslations();
@@ -178,7 +179,7 @@ public class StructuredTranslationTest {
     final float translationScore = structuredTranslation.getTranslationScore();
     final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
     final Map<String,Float> translationFeatures = structuredTranslation.getTranslationFeatures();
-    
+
     // THEN
     assertTrue(structuredTranslations.size() == 1);
     assertEquals(EXPECTED_TRANSLATION, translationString);
@@ -188,19 +189,19 @@ public class StructuredTranslationTest {
     assertEquals(wordAlignment.size(), translatedTokens.size());
     assertEquals(EXPECTED_FEATURES.entrySet(), translationFeatures.entrySet());
   }
-  
+
   @Test
   public void givenInput_whenStructuredOutputFormatWithKBest_thenExpectedOutput() {
     // GIVEN
     joshuaConfig.use_structured_output = true;
     joshuaConfig.topN = 100;
-    
+
     // WHEN
     final Translation translation = decode(INPUT);
     final List<StructuredTranslation> structuredTranslations = translation.getStructuredTranslations();
     final StructuredTranslation viterbiTranslation = structuredTranslations.get(0);
     final StructuredTranslation lastKBest = structuredTranslations.get(structuredTranslations.size() - 1);
-    
+
     // THEN
     assertEquals(structuredTranslations.size(), EXPECTED_NBEST_LIST_SIZE);
     assertTrue(structuredTranslations.size() > 1);
@@ -212,14 +213,14 @@ public class StructuredTranslationTest {
     // last entry in KBEST is all input words untranslated, should have 8 OOVs.
     assertEquals(INPUT, lastKBest.getTranslationString());
     assertEquals(-800.0, lastKBest.getTranslationFeatures().get("OOVPenalty"), 0.0001);
-    
+
   }
-  
+
   @Test
   public void givenEmptyInput_whenStructuredOutputFormat_thenEmptyOutput() {
     // GIVEN
     joshuaConfig.use_structured_output = true;
-    
+
     // WHEN
     final Translation translation = decode("");
     final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
@@ -227,20 +228,20 @@ public class StructuredTranslationTest {
     final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
     final float translationScore = structuredTranslation.getTranslationScore();
     final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
-    
+
     // THEN
     assertEquals("", translationString);
     assertTrue(translatedTokens.isEmpty());
     assertEquals(0, translationScore, 0.00001);
     assertTrue(wordAlignment.isEmpty());
   }
-  
+
   @Test
   public void givenOOVInput_whenStructuredOutputFormat_thenOOVOutput() {
     // GIVEN
     joshuaConfig.use_structured_output = true;
     final String input = "gabarbl";
-    
+
     // WHEN
     final Translation translation = decode(input);
     final StructuredTranslation structuredTranslation = translation.getStructuredTranslations().get(0);
@@ -248,23 +249,24 @@ public class StructuredTranslationTest {
     final List<String> translatedTokens = structuredTranslation.getTranslationTokens();
     final float translationScore = structuredTranslation.getTranslationScore();
     final List<List<Integer>> wordAlignment = structuredTranslation.getTranslationWordAlignments();
-    
+
     // THEN
     assertEquals(input, translationString);
     assertTrue(translatedTokens.contains(input));
     assertEquals(-99.0, translationScore, 0.00001);
     assertTrue(wordAlignment.contains(asList(0)));
   }
-  
+
   @Test
   public void givenEmptyInput_whenRegularOutputFormat_thenNewlineOutput() {
     // GIVEN
     joshuaConfig.use_structured_output = false;
-    
+    joshuaConfig.outputFormat = "%s";
+
     // WHEN
     final Translation translation = decode("");
     final String translationString = translation.toString();
-    
+
     // THEN
     assertEquals("\n", translationString);
   }

[2/2] incubator-joshua git commit: Merge branch 'junit'

Posted by mj...@apache.org.

Merge branch 'junit'


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/35e9c58f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/35e9c58f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/35e9c58f

Branch: refs/heads/master
Commit: 35e9c58ffead1b71de8cde5f6a3b74804e58b118
Parents: c4ce122 b1743cb
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Jun 2 09:27:42 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Jun 2 09:27:42 2016 -0400

----------------------------------------------------------------------
 pom.xml                                         | 12 ++-
 .../apache/joshua/decoder/ff/FeatureVector.java | 91 +++++++++++---------
 .../decoder/ff/lm/LanguageModelFFTest.java      | 28 +++---
 .../lm/berkeley_lm/LMGrammarBerkeleyTest.java   | 21 ++---
 .../kbest_extraction/KBestExtractionTest.java   | 14 +--
 .../ConstrainedPhraseDecodingTest.java          | 14 +--
 .../phrase/decode/PhraseDecodingTest.java       | 14 +--
 .../org/apache/joshua/system/KenLmTest.java     |  2 +
 .../joshua/system/StructuredOutputTest.java     |  4 +-
 .../system/StructuredTranslationTest.java       | 52 +++++------
 10 files changed, 136 insertions(+), 116 deletions(-)
----------------------------------------------------------------------