You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/20 02:43:25 UTC

[01/15] incubator-joshua git commit: large commit converting phrase-based decoding to new rule format

Repository: incubator-joshua
Updated Branches:
  refs/heads/JOSHUA-284 [created] 16d5647be


large commit converting phrase-based decoding to new rule format

Not working yet, but much of the code is redone and future estimates are being computed correctly


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/dcc7e7ee
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/dcc7e7ee
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/dcc7e7ee

Branch: refs/heads/JOSHUA-284
Commit: dcc7e7ee72228de08b70003a49344c2614eaedbe
Parents: fcaf0bf
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 16 18:13:06 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 16 18:13:06 2016 -0400

----------------------------------------------------------------------
 .gitignore                                      |   1 +
 .../decoder/ff/tm/format/MosesFormatReader.java |  13 +--
 .../apache/joshua/decoder/phrase/Candidate.java | 103 +++++++++++++------
 .../apache/joshua/decoder/phrase/Future.java    |   9 +-
 .../apache/joshua/decoder/phrase/Header.java    |  87 ----------------
 .../joshua/decoder/phrase/Hypothesis.java       |  48 ++++++---
 .../joshua/decoder/phrase/PhraseTable.java      |   8 +-
 .../org/apache/joshua/decoder/phrase/Stack.java |  15 ++-
 .../apache/joshua/decoder/phrase/Stacks.java    |  18 +++-
 .../joshua/decoder/phrase/TargetPhrases.java    |   1 +
 .../org/apache/joshua/tools/GrammarPacker.java  |   8 +-
 11 files changed, 136 insertions(+), 175 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index d3d311e..0d42974 100644
--- a/.gitignore
+++ b/.gitignore
@@ -57,3 +57,4 @@ build
 .classpath
 /target/
 .project
+/doc/

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java b/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
index 7811b3b..cdf2170 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
@@ -63,22 +63,15 @@ public class MosesFormatReader extends HieroFormatReader {
    *    
    * becomes
    * 
-   *    [X] ||| [X,1] mots francaises ||| [X,1] French words ||| 1 2 3  ||| 0-1 1-0
+   *    [X] ||| mots francaises ||| French words ||| 1 2 3  ||| 0-1 1-0
    *    
-   * For thrax-extracted phrasal grammars, it transforms
-   * 
-   *    [X] ||| mots francaises ||| French words ||| 1 2 3 ||| 0-1 1-0
-   *
-   * into
-   * 
-   *    [X] ||| [X,1] mots francaises ||| [X,1] French words ||| 1 2 3 ||| 0-1 1-0
+   * For thrax-extracted phrasal grammars, no transformation is needed.
    */
   @Override
   public Rule parseLine(String line) {
     String[] fields = line.split(Constants.fieldDelimiter);
     
-    String nt = FormatUtils.cleanNonTerminal(Constants.defaultNT);
-    StringBuffer hieroLine = new StringBuffer(Constants.defaultNT + " ||| [" + nt + ",1] " + fields[0] + " ||| [" + nt + ",1] " + fields[1] + " |||");
+    StringBuffer hieroLine = new StringBuffer(Constants.defaultNT + " ||| " + fields[0] + " ||| " + fields[1] + " |||");
 
     String mosesFeatureString = fields[2];
     for (String value: mosesFeatureString.split(" ")) {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index ee8a2a9..2abe560 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -19,9 +19,17 @@
 package org.apache.joshua.decoder.phrase;
 
 /*** 
- * A candidate is basically a cube prune state. It contains a list of hypotheses and target
- * phrases, and an instantiated candidate is a pair of indices that index these two lists. This
- * is the "cube prune" position.
+ * A candidate represents a translation hypothesis that may possibly be added to the translation
+ * hypergraph. It groups together (a) a set of translation hypotheses all having the same coverage
+ * vector and (b) a set of compatible phrase extensions that all cover the same source span. A 
+ * Candidate object therefore denotes a particular precise coverage vector. When a Candidate is
+ * instantiated, it has values in ranks[] that are indices into these two lists representing
+ * the current cube prune state.
+ * 
+ * For any particular (previous hypothesis) x (translation option) combination (a selection from
+ * both lists), there is no guarantee about whether this is a (m)onotonic, (s)wap, or (d)iscontinuous
+ * rule application. This must be inferred from the span (recording the portion of the input being
+ * translated) and the last index of the previous hypothesis under consideration.
  */
 
 import java.util.ArrayList;
@@ -30,27 +38,41 @@ import java.util.List;
 
 import org.apache.joshua.corpus.Span;
 import org.apache.joshua.decoder.chart_parser.ComputeNodeResult;
+import org.apache.joshua.decoder.ff.FeatureFunction;
 import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 import org.apache.joshua.decoder.ff.tm.Rule;
 import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.segment_file.Sentence;
 
 public class Candidate {
-
+  
+  private List<FeatureFunction> featureFunctions;
+  private Sentence sentence;
+  
+  // source span of new phrase
+  public Span span;
+  
   // the set of hypotheses that can be paired with phrases from this span 
   private List<Hypothesis> hypotheses;
 
   // the list of target phrases gathered from a span of the input
   private TargetPhrases phrases;
-
-  // source span of new phrase
-  public Span span;
   
   // future cost of applying phrases to hypotheses
-  float future_delta;
+  private float future_delta;
   
   // indices into the hypotheses and phrases arrays (used for cube pruning)
   private int[] ranks;
   
+  // the reordering rule used by an instantiated Candidate
+  private Rule rule;
+  
+  // the HGNode built over the current target side phrase
+  private HGNode phraseNode;
+  
+  // the cost of the current configuration
+  private ComputeNodeResult computedResult;
+  
   // scoring and state information 
   private ComputeNodeResult result;
   
@@ -96,22 +118,27 @@ public class Candidate {
         ranks[0], hypotheses.size(), ranks[1], phrases.size(),
         getHypothesis(), getRule().getEnglishWords().replaceAll("\\[.*?\\] ",""), getSpan());
   }
-  
-  public Candidate(List<Hypothesis> hypotheses, TargetPhrases phrases, Span span, float delta) {
-    this.hypotheses = hypotheses;
-    this.phrases = phrases;
-    this.span = span;
-    this.future_delta = delta;
-    this.ranks = new int[] { 0, 0 };
-  }
 
-  public Candidate(List<Hypothesis> hypotheses, TargetPhrases phrases, Span span, float delta, int[] ranks) {
+  public Candidate(List<FeatureFunction> featureFunctions, Sentence sentence, 
+      List<Hypothesis> hypotheses, TargetPhrases phrases, Span span, float delta, int[] ranks) {
     this.hypotheses = hypotheses;
     this.phrases = phrases;
     this.span = span;
     this.future_delta = delta;
     this.ranks = ranks;
+    this.rule = isMonotonic() ? Hypothesis.MONO_RULE : Hypothesis.END_RULE;
 //    this.score = hypotheses.get(ranks[0]).score + phrases.get(ranks[1]).getEstimatedCost();
+    this.phraseNode = null;
+  }
+  
+  /**
+   * Determines whether the current previous hypothesis extended with the currently selected
+   * phrase represents a straight or inverted rule application.
+   * 
+   * @return
+   */
+  private boolean isMonotonic() {
+    return getHypothesis().getLastSourceIndex() < span.start;
   }
   
   /**
@@ -131,7 +158,7 @@ public class Candidate {
    */
   public Candidate extendHypothesis() {
     if (ranks[0] < hypotheses.size() - 1) {
-      return new Candidate(hypotheses, phrases, span, future_delta, new int[] { ranks[0] + 1, ranks[1] });
+      return new Candidate(featureFunctions, sentence, hypotheses, phrases, span, future_delta, new int[] { ranks[0] + 1, ranks[1] });
     }
     return null;
   }
@@ -143,7 +170,7 @@ public class Candidate {
    */
   public Candidate extendPhrase() {
     if (ranks[1] < phrases.size() - 1) {
-      return new Candidate(hypotheses, phrases, span, future_delta, new int[] { ranks[0], ranks[1] + 1 });
+      return new Candidate(featureFunctions, sentence, hypotheses, phrases, span, future_delta, new int[] { ranks[0], ranks[1] + 1 });
     }
     
     return null;
@@ -170,13 +197,24 @@ public class Candidate {
   }
   
   /**
-   * This returns the target side {@link org.apache.joshua.corpus.Phrase}, which is a {@link org.apache.joshua.decoder.ff.tm.Rule} object. This is just a
-   * convenience function that works by returning the phrase indexed in ranks[1].
+   * This returns a new Hypothesis (HGNode) representing the phrase being added, i.e., a terminal
+   * production in the hypergraph. The score and DP state are computed only here on demand.
+   * 
+   * @return a new hypergraph node representing the phrase translation
+   */
+  public HGNode getPhraseNode() {
+    ComputeNodeResult result = new ComputeNodeResult(featureFunctions, getRule(), null, span.start, span.end, null, sentence);
+    phraseNode = new HGNode(-1, span.end, rule.getLHS(), result.getDPStates(), null, result.getPruningEstimate());
+    return phraseNode;
+  }
+    
+  /**
+   * This returns the rule being applied (straight or inverted)
    * 
    * @return the phrase at position ranks[1]
    */
   public Rule getRule() {
-    return phrases.get(ranks[1]);
+    return this.rule;
   }
   
   /**
@@ -187,7 +225,13 @@ public class Candidate {
    */
   public List<HGNode> getTailNodes() {
     List<HGNode> tailNodes = new ArrayList<HGNode>();
-    tailNodes.add(getHypothesis());
+    if (isMonotonic()) {
+      tailNodes.add(getHypothesis());
+      tailNodes.add(getPhraseNode());
+    } else {
+      tailNodes.add(getPhraseNode());
+      tailNodes.add(getHypothesis());
+    }
     return tailNodes;
   }
   
@@ -202,13 +246,8 @@ public class Candidate {
     return cov;
   }
 
-  /**
-   * Sets the result of a candidate (TODO should just be moved to the constructor).
-   * 
-   * @param result todo
-   */
-  public void setResult(ComputeNodeResult result) {
-    this.result = result;
+  public ComputeNodeResult getResult() {
+    return computedResult;
   }
 
   /**
@@ -234,8 +273,4 @@ public class Candidate {
   public List<DPState> getStates() {
     return result.getDPStates();
   }
-
-  public ComputeNodeResult getResult() {
-    return result;
-  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Future.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Future.java b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
index 0ece4a3..572aa64 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Future.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
@@ -43,7 +43,7 @@ public class Future {
 
     sentlen = chart.SentenceLength();
     entries = new ChartSpan<Float>(sentlen + 1, Float.NEGATIVE_INFINITY);
-
+    
     /*
      * The sentence is represented as a sequence of words, with the first and last words set
      * to <s> and </s>. We start indexing at 1 because the first word (<s>) is always covered.
@@ -68,7 +68,7 @@ public class Future {
 
     // All the phrases are in, now do minimum dynamic programming.  Lengths 0 and 1 were already handled above.
     for (int length = 2; length <= chart.SentenceLength(); length++) {
-      for (int begin = 1; begin <= chart.SentenceLength() - length; begin++) {
+      for (int begin = 1; begin < chart.SentenceLength() - length; begin++) {
         for (int division = begin + 1; division < begin + length; division++) {
           setEntry(begin, begin + length, Math.max(getEntry(begin, begin + length), getEntry(begin, division) + getEntry(division, begin + length)));
         }
@@ -106,14 +106,13 @@ public class Future {
 
   private float getEntry(int begin, int end) {
     assert end >= begin;
-    assert end < this.sentlen;
+    assert end <= this.sentlen;
     return entries.get(begin, end);
   }
 
   private void setEntry(int begin, int end, float value) {
     assert end >= begin;
-    assert end < this.sentlen;
-    //    System.err.println(String.format("future cost from %d to %d is %.5f", begin, end, value));
+    assert end <= this.sentlen;
     entries.set(begin, end, value);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Header.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Header.java b/src/main/java/org/apache/joshua/decoder/phrase/Header.java
deleted file mode 100644
index 30d771c..0000000
--- a/src/main/java/org/apache/joshua/decoder/phrase/Header.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.phrase;
-
-// PORT: done
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Comparator;
-
-public class Header implements Comparable<Header>, Comparator<Header> {
-
-  private static final Logger LOG = LoggerFactory.getLogger(Header.class);
-
-  private float score;
-  private int arity;
-  private Note note;
-    
-  protected Header() {
-    score = 0.0f;
-    arity = 0;
-    note = null;
-  }
-  
-  protected Header(Header other) {
-    this.score = other.GetScore();
-    this.arity = other.GetArity();
-    this.note = other.GetNote();
-  }
-  
-  protected Header(int arity) {
-    this.score = 0.0f;
-    this.arity = arity;
-    this.note = new Note();
-  }
-  
-  public boolean Valid() {
-    // C++: return base_;
-    LOG.debug("Header::Valid(): {}", (note != null));
-    return note != null;
-  }
-  
-  public float GetScore() {
-    return score;
-  }
-  
-  public void SetScore(float score) {
-    this.score = score;
-  }
-
-  public int GetArity() { return arity; }
-  
-  public Note GetNote() { return note; }
-  
-  public void SetNote(Note note) { this.note = note; }
-
-  @Override
-  public int compareTo(Header other) {
-    if (this.GetScore() < other.GetScore())
-      return -1;
-    else if (this.GetScore() > other.GetScore())
-      return 1;
-    return 0;
-  }
-  
-  @Override
-  public int compare(Header arg0, Header arg1) {
-    return arg0.compareTo(arg1);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index 71d3df9..f87b728 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@ -18,9 +18,8 @@
  */
 package org.apache.joshua.decoder.phrase;
 
-import java.util.List;	
+import java.util.List;
 
-import org.apache.joshua.corpus.Vocabulary;
 import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 import org.apache.joshua.decoder.ff.tm.Rule;
 import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
@@ -28,10 +27,12 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
 import org.apache.joshua.decoder.hypergraph.HyperEdge;
 
 /**
- * Represents a hypothesis, a translation of some coverage of the input. Extends {@link org.apache.joshua.decoder.hypergraph.HGNode}, 
- * through a bit of a hack. Whereas (i,j) represents the span of an {@link org.apache.joshua.decoder.hypergraph.HGNode}, i here is not used,
- * and j is overloaded to denote the span of the phrase being applied. The complete coverage vector 
- * can be obtained by looking at the tail pointer and casting it.
+ * Represents a hypothesis, a translation of some subset of the input sentence. Extends 
+ * {@link org.apache.joshua.decoder.hypergraph.HGNode}, through a bit of a hack. Whereas (i,j) 
+ * represents the span of an {@link org.apache.joshua.decoder.hypergraph.HGNode}, i here is not used,
+ * and j is overloaded to denote the index into the source string of the end of the last phrase that 
+ * was applied. The complete coverage vector can be obtained by looking at the tail pointer and 
+ * casting it.
  * 
  * @author Kenneth Heafield
  * @author Matt Post post@cs.jhu.edu
@@ -41,9 +42,11 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
   // The hypothesis' coverage vector
   private Coverage coverage;
 
-  public static Rule BEGIN_RULE = new HieroFormatReader().parseLine("[X] ||| <s> ||| <s> |||   ||| 0-0");
-  public static Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] </s> ||| [X,1] </s> |||   ||| 0-0 1-1");
-
+  public static Rule BEGIN_RULE = new HieroFormatReader().parseLine("[GOAL] ||| <s> ||| <s> |||   ||| 0-0");
+  public static Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> |||   ||| 0-0 1-1");
+  public static Rule MONO_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] |||   ||| 0-0 1-1");
+  public static Rule SWAP_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] [GOAL,2] ||| [GOAL,2] [X,1] |||   ||| 0-1 1-0");
+  
   public String toString() {
     StringBuffer sb = new StringBuffer();
     for (DPState state: getDPStates())
@@ -55,18 +58,25 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
 
   // Initialize root hypothesis. Provide the LM's BeginSentence.
   public Hypothesis(List<DPState> states, float futureCost) {
-    super(0, 1, Vocabulary.id("[X]"), states,
+    super(0, 1, BEGIN_RULE.getLHS(), states,
         new HyperEdge(BEGIN_RULE, 0.0f, 0.0f, null, null), futureCost);
     this.coverage = new Coverage(1);
   }
 
+  /**
+   * This creates a hypothesis from a Candidate object
+   * 
+   * @param cand the candidate
+   */
   public Hypothesis(Candidate cand) {
     // TODO: sourcepath
-    super(-1, cand.span.end, Vocabulary.id("[X]"), cand.getStates(), new HyperEdge(
-        cand.getRule(), cand.getResult().getViterbiCost(), cand.getResult().getTransitionCost(),
-        cand.getTailNodes(), null), cand.score());
+    super(-1, cand.span.end, cand.getRule().getLHS(), cand.getStates(), 
+        new HyperEdge(cand.getRule(), cand.getResult().getViterbiCost(), 
+            cand.getResult().getTransitionCost(),
+            cand.getTailNodes(), null), cand.score());
     this.coverage = cand.getCoverage();
   }
+
   
   // Extend a previous hypothesis.
   public Hypothesis(List<DPState> states, float score, Hypothesis previous, int source_end, Rule target) {
@@ -74,6 +84,10 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
     this.coverage = previous.coverage;
   }
 
+  public Hypothesis(int lastSourceIndex, int lhs, List<DPState> states) {
+    super(-1, lastSourceIndex, lhs, states, null, 0.0f);
+  }
+
   public Coverage getCoverage() {
     return coverage;
   }
@@ -86,16 +100,16 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
    * HGNodes (designed for chart parsing) maintain a span (i,j). We overload j
    * here to record the index of the last translated source word.
    * 
-   * @return the int 'j' which is overloaded to denote the span of the phrase being applied
+   * @return the index of the last translated source word
    */
-  public int LastSourceIndex() {
+  public int getLastSourceIndex() {
     return j;
   }
 
   @Override
   public int hashCode() {
     int hash = 0;
-    hash = 31 * LastSourceIndex() + 19 * getCoverage().hashCode();
+    hash = 31 * getLastSourceIndex() + 19 * getCoverage().hashCode();
     if (null != dpStates && dpStates.size() > 0)
       for (DPState dps: dpStates)
         hash *= 57 + dps.hashCode();
@@ -112,7 +126,7 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
     if (obj instanceof Hypothesis) {
       Hypothesis other = (Hypothesis) obj;
 
-      if (LastSourceIndex() != other.LastSourceIndex() || ! getCoverage().equals(other.getCoverage()))
+      if (getLastSourceIndex() != other.getLastSourceIndex() || ! getCoverage().equals(other.getCoverage()))
         return false;
       
       if (dpStates == null)

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
index 312781f..6b237a9 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
@@ -18,8 +18,6 @@
  */
 package org.apache.joshua.decoder.phrase;
 
-import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER;
-
 import java.io.File;
 import java.io.IOException;
 import java.util.List;
@@ -80,14 +78,13 @@ public class PhraseTable implements Grammar {
   }
       
   /**
-   * Returns the longest source phrase read. Because phrases have a dummy nonterminal prepended to
-   * them, we need to subtract 1.
+   * Returns the longest source phrase read.
    * 
    * @return the longest source phrase read.
    */
   @Override
   public int getMaxSourcePhraseLength() {
-    return this.backend.getMaxSourcePhraseLength() - 1;
+    return this.backend.getMaxSourcePhraseLength();
   }
 
   /**
@@ -99,7 +96,6 @@ public class PhraseTable implements Grammar {
   public RuleCollection getPhrases(int[] sourceWords) {
     if (sourceWords.length != 0) {
       Trie pointer = getTrieRoot();
-      pointer = pointer.match(Vocabulary.id("[X]"));
       int i = 0;
       while (pointer != null && i < sourceWords.length)
         pointer = pointer.match(sourceWords[i++]);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
index d0ae2da..ad24a51 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
@@ -22,13 +22,10 @@ import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.List;
 import java.util.PriorityQueue;
 import java.util.Set;
 
 import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.chart_parser.ComputeNodeResult;
-import org.apache.joshua.decoder.ff.FeatureFunction;
 import org.apache.joshua.decoder.segment_file.Sentence;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -46,7 +43,6 @@ public class Stack extends ArrayList<Hypothesis> {
   private HashMap<Coverage, ArrayList<Hypothesis>> coverages;
   
   private Sentence sentence;
-  private List<FeatureFunction> featureFunctions;
   private JoshuaConfiguration config;
 
   /* The list of states we've already visited. */
@@ -65,8 +61,7 @@ public class Stack extends ArrayList<Hypothesis> {
    * @param sentence input for a {@link org.apache.joshua.lattice.Lattice}
    * @param config populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
    */
-  public Stack(List<FeatureFunction> featureFunctions, Sentence sentence, JoshuaConfiguration config) {
-    this.featureFunctions = featureFunctions;
+  public Stack(Sentence sentence, JoshuaConfiguration config) {
     this.sentence = sentence;
     this.config = config;
     
@@ -149,6 +144,9 @@ public class Stack extends ArrayList<Hypothesis> {
 
     // Constrained decoding
     if (sentence.target() != null) {
+      throw new RuntimeException("* FATAL! Constrained decoding no longer works for the new phrase format");
+      // TODO: fix constrained decoding
+      /*
       String oldWords = cand.getHypothesis().bestHyperedge.getRule().getEnglishWords().replace("[X,1] ",  "");
       String newWords = cand.getRule().getEnglishWords().replace("[X,1] ",  "");
           
@@ -159,12 +157,10 @@ public class Stack extends ArrayList<Hypothesis> {
           addCandidate(next); 
         return;
       }
+      */
     }
 
     // TODO: sourcepath
-    ComputeNodeResult result = new ComputeNodeResult(this.featureFunctions, cand.getRule(),
-        cand.getTailNodes(), -1, cand.getSpan().end, null, this.sentence);
-    cand.setResult(result);
     
     candidates.add(cand);
   }
@@ -199,6 +195,7 @@ public class Stack extends ArrayList<Hypothesis> {
   /**
    * Adds a popped candidate to the chart / main stack. This is a candidate we have decided to
    * keep around.
+   * 
    * @param complete a completely-initialized translation {@link org.apache.joshua.decoder.phrase.Candidate}
    * 
    */

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
index 8c092ec..dc1a692 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
@@ -126,13 +126,13 @@ public class Stacks {
     // Initialize root hypothesis with <s> context and future cost for everything.
     ComputeNodeResult result = new ComputeNodeResult(this.featureFunctions, Hypothesis.BEGIN_RULE,
         null, -1, 1, null, this.sentence);
-    Stack firstStack = new Stack(featureFunctions, sentence, config);
+    Stack firstStack = new Stack(sentence, config);
     firstStack.add(new Hypothesis(result.getDPStates(), future.Full()));
     stacks.add(firstStack);
     
     // Decode with increasing numbers of source words. 
     for (int source_words = 2; source_words <= sentence.length(); ++source_words) {
-      Stack targetStack = new Stack(featureFunctions, sentence, config);
+      Stack targetStack = new Stack(sentence, config);
       stacks.add(targetStack);
 
       // Iterate over stacks to continue from.
@@ -144,7 +144,13 @@ public class Stacks {
         LOG.debug("WORDS {} MAX {} (STACK {} phrase_length {})", source_words,
             chart.MaxSourcePhraseLength(), from_stack, phrase_length);
         
-        // Iterate over antecedents in this stack.
+        /* Each from stack groups together lots of different coverage vectors that all cover the
+         * same number of words. We have the number of covered words from from_stack, and the length
+         * of the phrases we are going to add from (source_words - from_stack). We now iterate over
+         * all coverage vectors, finding the set of phrases that can extend each of them, given
+         * the two constraints: the phrase length, and the current coverage vector. These will all
+         * be grouped under the same target stack.
+         */
         for (Coverage coverage: tailStack.getCoverages()) {
           ArrayList<Hypothesis> hypotheses = tailStack.get(coverage); 
           
@@ -161,6 +167,9 @@ public class Stacks {
               continue;
             }
 
+            /* We have found a permissible phrase start point and length, that fits with the current
+             * coverage vector. Record that in a Span.
+             */
             Span span = new Span(begin, begin + phrase_length);
 
             // Don't append </s> until the end
@@ -171,7 +180,6 @@ public class Stacks {
             if (phrases == null)
               continue;
 
-
             LOG.debug("Applying {} target phrases over [{}, {}]",
                 phrases.size(), begin, begin + phrase_length);
             
@@ -185,7 +193,7 @@ public class Stacks {
              * phrases from that span. The hypotheses are wrapped in HypoState objects, which
              * augment the hypothesis score with a future cost.
              */
-            Candidate cand = new Candidate(hypotheses, phrases, span, future_delta);
+            Candidate cand = new Candidate(featureFunctions, sentence, hypotheses, phrases, span, future_delta, new int[] {0, 0});
             targetStack.addCandidate(cand);
           }
         }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java b/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
index 05a4b0a..ed1d577 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
@@ -59,6 +59,7 @@ public class TargetPhrases extends ArrayList<Rule> {
    * Score the rules and sort them. Scoring is necessary because rules are only scored if they
    * are used, in an effort to make reading in rules more efficient. This is starting to create
    * some trouble and should probably be reworked.
+   * 
    * @param features a {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
    * @param weights a populated {@link org.apache.joshua.decoder.ff.FeatureVector}
    * @param num_options the number of options

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dcc7e7ee/src/main/java/org/apache/joshua/tools/GrammarPacker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/tools/GrammarPacker.java b/src/main/java/org/apache/joshua/tools/GrammarPacker.java
index b9208d2..b39b775 100644
--- a/src/main/java/org/apache/joshua/tools/GrammarPacker.java
+++ b/src/main/java/org/apache/joshua/tools/GrammarPacker.java
@@ -61,9 +61,13 @@ public class GrammarPacker {
    * table packing that packed phrases without the [X,1] on the source and target sides, which
    * then required special handling in the decoder to use for phrase-based decoding.
    * 
-   * 
+   * - 4 (August 2016). Phrase-based decoding rewritten to represent phrases without a builtin
+   * nonterminal. Instead, cost-less glue rules are used in phrase-based decoding. This eliminates
+   * the need for special handling of phrase grammars (except for having to add a LHS), and lets
+   * phrase grammars be used in both hierarchical and phrase-based decoding without conversion.
+   *
    */
-  public static final int VERSION = 3;
+  public static final int VERSION = 4;
   
   // Size limit for slice in bytes.
   private static int DATA_SIZE_LIMIT = (int) (Integer.MAX_VALUE * 0.8);


[07/15] incubator-joshua git commit: changed order of assert() args

Posted by mj...@apache.org.
changed order of assert() args


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2e746c18
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2e746c18
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2e746c18

Branch: refs/heads/JOSHUA-284
Commit: 2e746c1864ca7eb6be27f2fca3ab258c9ebe7adb
Parents: 1022699
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 13:14:18 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 13:14:18 2016 -0500

----------------------------------------------------------------------
 .../apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2e746c18/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index 9e9a415..e81b0de 100644
--- a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@ -64,7 +64,7 @@ public class PhraseDecodingTest {
   public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
     final String translation = decode(INPUT).toString();
     final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
-    assertEquals(gold, translation);
+    assertEquals(translation, gold);
   }
 
   private Translation decode(String input) {


[12/15] incubator-joshua git commit: converted from span -> separate i, j

Posted by mj...@apache.org.
converted from span -> separate i, j


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/eb002238
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/eb002238
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/eb002238

Branch: refs/heads/JOSHUA-284
Commit: eb00223870c7683cf8e557ab689a1979fb36ec1d
Parents: 5719c8c
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 19:43:58 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 19:43:58 2016 -0500

----------------------------------------------------------------------
 .../apache/joshua/decoder/phrase/Candidate.java | 33 +++++++++++---------
 .../apache/joshua/decoder/phrase/Future.java    |  6 ++--
 .../joshua/decoder/phrase/Hypothesis.java       |  6 +---
 .../joshua/decoder/phrase/PhraseChart.java      |  5 ++-
 .../apache/joshua/decoder/phrase/Stacks.java    |  4 ++-
 .../joshua/decoder/phrase/TargetPhrases.java    |  8 ++++-
 6 files changed, 36 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/eb002238/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index bd581e3..cb9cd6d 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -50,9 +50,6 @@ public class Candidate {
   private List<FeatureFunction> featureFunctions;
   private Sentence sentence;
   
-  // source span of new phrase
-  public Span span;
-  
   // the set of hypotheses that can be paired with phrases from this span 
   private List<Hypothesis> hypotheses;
 
@@ -92,7 +89,7 @@ public class Candidate {
   public boolean equals(Object obj) {
     if (obj instanceof Candidate) {
       Candidate other = (Candidate) obj;
-      if (hypotheses != other.hypotheses || phrases != other.phrases || span != other.span)
+      if (hypotheses != other.hypotheses || phrases != other.phrases)
         return false;
       
       if (ranks.length != other.ranks.length)
@@ -111,7 +108,6 @@ public class Candidate {
   public int hashCode() {
     return 17 * hypotheses.size() 
         + 23 * phrases.size() 
-        + 57 * span.hashCode() 
         + 117 * Arrays.hashCode(ranks);
 //    return hypotheses.hashCode() * phrases.hashCode() * span.hashCode() * Arrays.hashCode(ranks);
   }
@@ -124,12 +120,11 @@ public class Candidate {
   }
 
   public Candidate(List<FeatureFunction> featureFunctions, Sentence sentence, 
-      List<Hypothesis> hypotheses, TargetPhrases phrases, Span span, float delta, int[] ranks) {
+      List<Hypothesis> hypotheses, TargetPhrases phrases, float delta, int[] ranks) {
     this.featureFunctions = featureFunctions;
     this.sentence = sentence;
     this.hypotheses = hypotheses;
     this.phrases = phrases;
-    this.span = span;
     this.future_delta = delta;
     this.ranks = ranks;
     this.rule = isMonotonic() ? Hypothesis.MONO_RULE : Hypothesis.SWAP_RULE;
@@ -148,7 +143,9 @@ public class Candidate {
    * @return
    */
   private boolean isMonotonic() {
-    return getHypothesis().getLastSourceIndex() < span.start;
+//    System.err.println(String.format("isMonotonic(); %d < %d -> %s", 
+//        getLastCovered(), getPhraseEnd(), getLastCovered() < getPhraseEnd()));
+    return getLastCovered() < getPhraseEnd();
   }
   
   /**
@@ -168,7 +165,7 @@ public class Candidate {
    */
   public Candidate extendHypothesis() {
     if (ranks[0] < hypotheses.size() - 1) {
-      return new Candidate(featureFunctions, sentence, hypotheses, phrases, span, future_delta, new int[] { ranks[0] + 1, ranks[1] });
+      return new Candidate(featureFunctions, sentence, hypotheses, phrases, future_delta, new int[] { ranks[0] + 1, ranks[1] });
     }
     return null;
   }
@@ -180,7 +177,7 @@ public class Candidate {
    */
   public Candidate extendPhrase() {
     if (ranks[1] < phrases.size() - 1) {
-      return new Candidate(featureFunctions, sentence, hypotheses, phrases, span, future_delta, new int[] { ranks[0], ranks[1] + 1 });
+      return new Candidate(featureFunctions, sentence, hypotheses, phrases, future_delta, new int[] { ranks[0], ranks[1] + 1 });
     }
     
     return null;
@@ -192,7 +189,7 @@ public class Candidate {
    * @return the span object
    */
   public Span getSpan() {
-    return this.span;
+    return new Span(this.phrases.i, this.phrases.j);
   }
   
   /**
@@ -236,13 +233,13 @@ public class Candidate {
   public ComputeNodeResult computeResult() {
     if (computedResult == null) {
       // add the phrase node
-      ComputeNodeResult phraseResult = new ComputeNodeResult(featureFunctions, getPhraseRule(), null, span.start, span.end, null, sentence);
+      ComputeNodeResult phraseResult = new ComputeNodeResult(featureFunctions, getPhraseRule(), null, phrases.i, phrases.j, null, sentence);
       HyperEdge edge = new HyperEdge(getPhraseRule(), phraseResult.getViterbiCost(), phraseResult.getTransitionCost(), null, null);
-      phraseNode = new HGNode(-1, span.end, rule.getLHS(), phraseResult.getDPStates(), edge, phraseResult.getPruningEstimate());
+      phraseNode = new HGNode(phrases.i, phrases.j, rule.getLHS(), phraseResult.getDPStates(), edge, phraseResult.getPruningEstimate());
 
       // add the rule
       // TODO: sourcepath
-      computedResult = new ComputeNodeResult(featureFunctions, getRule(), getTailNodes(), -1, span.end, null, sentence);
+      computedResult = new ComputeNodeResult(featureFunctions, getRule(), getTailNodes(), getLastCovered(), getPhraseEnd(), null, sentence);
     }
     
     return computedResult;
@@ -315,4 +312,12 @@ public class Candidate {
   public List<DPState> getStates() {
     return computeResult().getDPStates();
   }
+  
+  public int getLastCovered() {
+    return getHypothesis().getLastSourceIndex();
+  }
+  
+  public int getPhraseEnd() {
+    return phrases.j;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/eb002238/src/main/java/org/apache/joshua/decoder/phrase/Future.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Future.java b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
index 572aa64..c411ccb 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Future.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
@@ -99,8 +99,9 @@ public class Future {
   public float Change(Coverage coverage, int begin, int end) {
     int left = coverage.leftOpening(begin);
     int right = coverage.rightOpening(end, sentlen);
-    //    System.err.println(String.format("Future::Change(%s, %d, %d) left %d right %d %.3f %.3f %.3f", coverage, begin, end, left, right,
-    //        Entry(left, begin), Entry(end, right), Entry(left, right)));
+//        System.err.println(String.format("Future.Change(%s, %d, %d) left %d right %d %.3f %.3f %.3f", 
+//            coverage, begin, end, left, right,
+//            getEntry(left, begin), getEntry(end, right), getEntry(left, right)));
     return getEntry(left, begin) + getEntry(end, right) - getEntry(left, right);
   }
 
@@ -113,6 +114,7 @@ public class Future {
   private void setEntry(int begin, int end, float value) {
     assert end >= begin;
     assert end <= this.sentlen;
+//    System.err.println(String.format("Future.setEntry(%d, %d) = %f", begin, end, value));
     entries.set(begin, end, value);
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/eb002238/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index 132d62d..8ef5597 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@ -70,7 +70,7 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
    */
   public Hypothesis(Candidate cand) {
     // TODO: sourcepath
-    super(-1, cand.span.end, cand.getRule().getLHS(), cand.getStates(), 
+    super(cand.getLastCovered(), cand.getPhraseEnd(), cand.getRule().getLHS(), cand.getStates(), 
         new HyperEdge(cand.getRule(), cand.computeResult().getViterbiCost(), 
             cand.computeResult().getTransitionCost(),
             cand.getTailNodes(), null), cand.score());
@@ -84,10 +84,6 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
     this.coverage = previous.coverage;
   }
 
-  public Hypothesis(int lastSourceIndex, int lhs, List<DPState> states) {
-    super(-1, lastSourceIndex, lhs, states, null, 0.0f);
-  }
-
   public Coverage getCoverage() {
     return coverage;
   }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/eb002238/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java b/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
index 9803d9b..dd5872f 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
@@ -185,9 +185,8 @@ public class PhraseChart {
       try {
         int offset = offset(begin, end);
         if (entries.get(offset) == null)
-          entries.set(offset, new TargetPhrases(rules));
-        else
-          entries.get(offset).addAll(rules);
+          entries.set(offset, new TargetPhrases(begin, end));
+        entries.get(offset).addAll(rules);
       } catch (java.lang.IndexOutOfBoundsException e) {
         LOG.error("Whoops! {} [{}-{}] too long ({})", to, begin, end, entries.size());
         LOG.error(e.getMessage(), e);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/eb002238/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
index dc1a692..c642226 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
@@ -193,7 +193,7 @@ public class Stacks {
              * phrases from that span. The hypotheses are wrapped in HypoState objects, which
              * augment the hypothesis score with a future cost.
              */
-            Candidate cand = new Candidate(featureFunctions, sentence, hypotheses, phrases, span, future_delta, new int[] {0, 0});
+            Candidate cand = new Candidate(featureFunctions, sentence, hypotheses, phrases, future_delta, new int[] {0, 0});
             targetStack.addCandidate(cand);
           }
         }
@@ -267,6 +267,8 @@ public class Stacks {
       
       float finalTransitionScore = ComputeNodeResult.computeFinalCost(featureFunctions, tailNodes, 0, sentence.length(), null, sentence);
 
+      System.err.println(String.format("createGoalNode: final score: %f -> %f", score, finalTransitionScore));
+      
       if (null == this.end)
         this.end = new Hypothesis(null, score + finalTransitionScore, hyp, sentence.length(), null);
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/eb002238/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java b/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
index ed1d577..5692d1a 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/TargetPhrases.java
@@ -37,9 +37,15 @@ import org.apache.joshua.decoder.ff.tm.Rule;
 public class TargetPhrases extends ArrayList<Rule> {
 
   private static final long serialVersionUID = 1L;
+  
+  public int i = -2;
+  public int j = -2;
 
-  public TargetPhrases() {
+  public TargetPhrases(int i, int j) {
     super();
+    
+    this.i = i;
+    this.j = j;
   }
   
   /**


[11/15] incubator-joshua git commit: fixed distortion computation to work with new format

Posted by mj...@apache.org.
fixed distortion computation to work with new format

code now produces a translation on my test case, though it's not the correct one


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/5719c8cf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/5719c8cf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/5719c8cf

Branch: refs/heads/JOSHUA-284
Commit: 5719c8cff728499bffd1053462351340f1d91353
Parents: 9b73d61
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 13:17:21 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 13:17:21 2016 -0500

----------------------------------------------------------------------
 .../org/apache/joshua/decoder/ff/phrase/Distortion.java | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5719c8cf/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
index f9e6a29..072162b 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
@@ -56,12 +56,14 @@ public class Distortion extends StatelessFF {
   public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
       Sentence sentence, Accumulator acc) {
 
-    if (rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE) {
-        int start_point = j - rule.getFrench().length + rule.getArity();
+    if (rule == Hypothesis.MONO_RULE || rule == Hypothesis.SWAP_RULE) {
+//        int start_point = j - rule.getFrench().length + rule.getArity();
+//        int jump_size = Math.abs(tailNodes.get(0).j - start_point);
 
-        int jump_size = Math.abs(tailNodes.get(0).j - start_point);
-//        acc.add(name, -jump_size);
-        acc.add(denseFeatureIndex, -jump_size); 
+      int start_point = tailNodes.get(rule == Hypothesis.MONO_RULE ? 0 : 1).j;
+      int jump_size = Math.abs(j - start_point);
+
+      acc.add(denseFeatureIndex, -jump_size); 
     }
     
 //    System.err.println(String.format("DISTORTION(%d, %d) from %d = %d", i, j, tailNodes != null ? tailNodes.get(0).j : -1, jump_size));


[13/15] incubator-joshua git commit: off-by-one error in computing future estimates

Posted by mj...@apache.org.
off-by-one error in computing future estimates


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/473b3016
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/473b3016
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/473b3016

Branch: refs/heads/JOSHUA-284
Commit: 473b3016562677671f70a19cd15d67a2bc1a5c83
Parents: eb00223
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 19:44:14 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 19:44:14 2016 -0500

----------------------------------------------------------------------
 src/main/java/org/apache/joshua/decoder/phrase/Future.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/473b3016/src/main/java/org/apache/joshua/decoder/phrase/Future.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Future.java b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
index c411ccb..b1bdcc8 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Future.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
@@ -68,7 +68,7 @@ public class Future {
 
     // All the phrases are in, now do minimum dynamic programming.  Lengths 0 and 1 were already handled above.
     for (int length = 2; length <= chart.SentenceLength(); length++) {
-      for (int begin = 1; begin < chart.SentenceLength() - length; begin++) {
+      for (int begin = 1; begin <= chart.SentenceLength() - length; begin++) {
         for (int division = begin + 1; division < begin + length; division++) {
           setEntry(begin, begin + length, Math.max(getEntry(begin, begin + length), getEntry(begin, division) + getEntry(division, begin + length)));
         }


[15/15] incubator-joshua git commit: fixed computation of distortion

Posted by mj...@apache.org.
fixed computation of distortion


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/16d5647b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/16d5647b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/16d5647b

Branch: refs/heads/JOSHUA-284
Commit: 16d5647bee30345ffa56b5b7d5bebc1021afa3fa
Parents: 574cb36
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 19:45:12 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 19:45:12 2016 -0500

----------------------------------------------------------------------
 .../joshua/decoder/ff/phrase/Distortion.java    | 23 +++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/16d5647b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
index 072162b..d4b49db 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
@@ -60,14 +60,27 @@ public class Distortion extends StatelessFF {
 //        int start_point = j - rule.getFrench().length + rule.getArity();
 //        int jump_size = Math.abs(tailNodes.get(0).j - start_point);
 
-      int start_point = tailNodes.get(rule == Hypothesis.MONO_RULE ? 0 : 1).j;
-      int jump_size = Math.abs(j - start_point);
+      if (rule == Hypothesis.MONO_RULE) {
+        int start_point = j - tailNodes.get(1).getHyperEdges().get(0).getRule().getFrench().length;
+        int last_point = tailNodes.get(0).j;
+        int jump_size = Math.abs(start_point - last_point);
+      
+//        System.err.println(String.format("DISTORTION_mono(%d -> %d) = %d", 
+//            last_point, start_point, jump_size));
 
-      acc.add(denseFeatureIndex, -jump_size); 
+        acc.add(denseFeatureIndex, -jump_size);
+      } else {
+        int start_point = j - tailNodes.get(0).getHyperEdges().get(0).getRule().getFrench().length;
+        int last_point = tailNodes.get(1).j;
+        int jump_size = Math.abs(start_point - last_point);
+      
+//        System.err.println(String.format("DISTORTION_swap(%d -> %d) = %d", 
+//            last_point, start_point, jump_size));
+
+        acc.add(denseFeatureIndex, -jump_size);    
+      }
     }
     
-//    System.err.println(String.format("DISTORTION(%d, %d) from %d = %d", i, j, tailNodes != null ? tailNodes.get(0).j : -1, jump_size));
-
     return null;
   }
 }


[05/15] incubator-joshua git commit: enabled test

Posted by mj...@apache.org.
enabled test


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/b1ec6271
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/b1ec6271
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/b1ec6271

Branch: refs/heads/JOSHUA-284
Commit: b1ec62711d15f3b692b6a7026752123f75522f6e
Parents: dac822d
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 17 06:24:07 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 17 06:24:07 2016 -0400

----------------------------------------------------------------------
 .../apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1ec6271/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index f2fc6a7..9e9a415 100644
--- a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@ -60,7 +60,7 @@ public class PhraseDecodingTest {
     decoder = null;
   }
 
-  @Test(enabled = false)
+  @Test(enabled = true)
   public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
     final String translation = decode(INPUT).toString();
     final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
@@ -69,6 +69,7 @@ public class PhraseDecodingTest {
 
   private Translation decode(String input) {
     final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+    joshuaConfig.setVerbosity(2);
     return decoder.decode(sentence);
   }
 


[04/15] incubator-joshua git commit: missed file in commit

Posted by mj...@apache.org.
missed file in commit


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/dac822d1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/dac822d1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/dac822d1

Branch: refs/heads/JOSHUA-284
Commit: dac822d15145614c33f5fb12d2797e1f91825bb3
Parents: 48a9aad
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 17 06:23:57 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 17 06:23:57 2016 -0400

----------------------------------------------------------------------
 .../apache/joshua/decoder/phrase/Candidate.java | 55 ++++++++++++++------
 1 file changed, 40 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dac822d1/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index 2abe560..303749d 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -67,14 +67,17 @@ public class Candidate {
   // the reordering rule used by an instantiated Candidate
   private Rule rule;
   
-  // the HGNode built over the current target side phrase
-  private HGNode phraseNode;
-  
-  // the cost of the current configuration
+  /* 
+   * Stores the inside cost of the current phrase, as well as the computed dynamic programming
+   * state. Expensive to compute so there is an option of delaying it.
+   */
   private ComputeNodeResult computedResult;
   
-  // scoring and state information 
-  private ComputeNodeResult result;
+  /*
+   * This is the HGNode built over the current target side phrase. It requires the computed results
+   * as part of its constructor, so we delay computing it unless needed.
+   */
+  private HGNode phraseNode;
   
   /**
    * When candidate objects are extended, the new one is initialized with the same underlying
@@ -121,6 +124,8 @@ public class Candidate {
 
   public Candidate(List<FeatureFunction> featureFunctions, Sentence sentence, 
       List<Hypothesis> hypotheses, TargetPhrases phrases, Span span, float delta, int[] ranks) {
+    this.featureFunctions = featureFunctions;
+    this.sentence = sentence;
     this.hypotheses = hypotheses;
     this.phrases = phrases;
     this.span = span;
@@ -128,7 +133,11 @@ public class Candidate {
     this.ranks = ranks;
     this.rule = isMonotonic() ? Hypothesis.MONO_RULE : Hypothesis.END_RULE;
 //    this.score = hypotheses.get(ranks[0]).score + phrases.get(ranks[1]).getEstimatedCost();
-    this.phraseNode = null;
+    
+    // TODO: compute this proactively or lazily according to a parameter
+    getResult();
+//    this.phraseNode = null;
+//    this.computedResult = null; 
   }
   
   /**
@@ -203,10 +212,24 @@ public class Candidate {
    * @return a new hypergraph node representing the phrase translation
    */
   public HGNode getPhraseNode() {
-    ComputeNodeResult result = new ComputeNodeResult(featureFunctions, getRule(), null, span.start, span.end, null, sentence);
-    phraseNode = new HGNode(-1, span.end, rule.getLHS(), result.getDPStates(), null, result.getPruningEstimate());
+    getResult();
     return phraseNode;
   }
+  
+  /**
+   * Ensures that the cost of applying the edge has been computed. This is tucked away in an
+   * accessor so that we can do it lazily if we wish.
+   * 
+   * @return
+   */
+  public ComputeNodeResult getResult() {
+    if (computedResult == null) {
+      computedResult = new ComputeNodeResult(featureFunctions, getRule(), null, span.start, span.end, null, sentence);
+      phraseNode = new HGNode(-1, span.end, rule.getLHS(), computedResult.getDPStates(), null, computedResult.getPruningEstimate());
+    }
+    
+    return computedResult;
+  }
     
   /**
    * This returns the rule being applied (straight or inverted)
@@ -246,10 +269,6 @@ public class Candidate {
     return cov;
   }
 
-  public ComputeNodeResult getResult() {
-    return computedResult;
-  }
-
   /**
    * This returns the sum of two costs: the HypoState cost + the transition cost. The HypoState cost
    * is in turn the sum of two costs: the Viterbi cost of the underlying hypothesis, and the adjustment
@@ -263,7 +282,13 @@ public class Candidate {
    * @return the sum of two costs: the HypoState cost + the transition cost
    */
   public float score() {
-    return getHypothesis().getScore() + future_delta + result.getTransitionCost();
+    float score = getHypothesis().getScore() + future_delta;
+    /* 
+     * TODO: you can add this if it's been computed.
+     */
+    if (computedResult != null)
+      score += computedResult.getTransitionCost();
+    return score;
   }
   
   public float getFutureEstimate() {
@@ -271,6 +296,6 @@ public class Candidate {
   }
   
   public List<DPState> getStates() {
-    return result.getDPStates();
+    return getResult().getDPStates();
   }
 }


[02/15] incubator-joshua git commit: updated scripts to work with the new format

Posted by mj...@apache.org.
updated scripts to work with the new format


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/32504c47
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/32504c47
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/32504c47

Branch: refs/heads/JOSHUA-284
Commit: 32504c47bbc90b3fd4a8d02298b9758fa8126a44
Parents: dcc7e7e
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 16 18:13:50 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 16 18:13:50 2016 -0400

----------------------------------------------------------------------
 scripts/support/phrase2hiero.py | 22 ++++------------------
 scripts/training/pipeline.pl    |  8 ++------
 2 files changed, 6 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32504c47/scripts/support/phrase2hiero.py
----------------------------------------------------------------------
diff --git a/scripts/support/phrase2hiero.py b/scripts/support/phrase2hiero.py
index e741564..581a823 100755
--- a/scripts/support/phrase2hiero.py
+++ b/scripts/support/phrase2hiero.py
@@ -1,13 +1,10 @@
 #!/usr/bin/python
 
 """
-Prepends nonterminals to source and target side of phrase rules, and also
-increments the alignment points (if present) to match.
-This allows them to be used in the phrase-based decoder.
+Converts a Moses phrase table to a Joshua phrase table. The differences are
+(a) adding an LHS and (b) applying -log() to all the model weights.
 
-Usage: gzip -cd grammar.gz | phrase2hiero.py [-moses] | gzip -9n > grammar.new.gz
-
-If you specify "-moses", it will also apply -log() to each of the model weights.
+Usage: gzip -cd grammar.gz | phrase2hiero.py | gzip -9n > grammar.new.gz
 
 Author: Matt Post <po...@cs.jhu.edu>
 Date:   June 2016
@@ -16,7 +13,6 @@ Date:   June 2016
 import sys
 import math
 import codecs
-import argparse
 
 reload(sys)
 sys.setdefaultencoding('utf-8')
@@ -24,11 +20,6 @@ sys.stdin = codecs.getreader('utf-8')(sys.stdin)
 sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
 sys.stdout.encoding = 'utf-8'
 
-def incr(alignment):
-    """Takes an alignment point (0-1) and increments both sides"""
-    points = alignment.split('-')
-    return '%d-%d' % (int(points[0]) + 1, int(points[1]) + 1)
-
 def maybelog(value):
     """Takes a feature value and returns -log(x) if it is a scalar"""
     try:
@@ -46,14 +37,9 @@ for line in sys.stdin:
 
     # Get all the fields
     tokens = line.split(r' ||| ')
-    tokens[1] = '[X,1] ' + tokens[1]
-    tokens[2] = '[X,1] ' + tokens[2]
 
     # take the -log() of each input token
     if moses and len(tokens) >= 4:
         tokens[3] = ' '.join(map(maybelog, tokens[3].split(' ')))
 
-    if len(tokens) >= 5:
-        tokens[4] = ' '.join(map(incr, tokens[4].split(' ')))
-
-    print ' ||| '.join(tokens)
+    print ' ||| '.join(tokens),

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32504c47/scripts/training/pipeline.pl
----------------------------------------------------------------------
diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl
index ea617bc..08933ec 100755
--- a/scripts/training/pipeline.pl
+++ b/scripts/training/pipeline.pl
@@ -1123,7 +1123,7 @@ if (! defined $GRAMMAR_FILE) {
 
     # Convert the model to Joshua format
     $cachepipe->cmd("convert-moses-to-joshua",
-                    "$CAT model/phrase-table.gz | $SCRIPTDIR/support/phrase2hiero.py | gzip -9n > grammar.gz",
+                    "$CAT model/phrase-table.gz | $SCRIPTDIR/support/phrase2hiero.py -moses | gzip -9n > grammar.gz",
                     "model/phrase-table.gz",
                     "grammar.gz",
         );
@@ -1165,10 +1165,6 @@ if (! defined $GRAMMAR_FILE) {
 
     $GRAMMAR_FILE = "grammar.gz";
 
-    # Convert phrase model to hiero format (Thrax should do this!)
-    if ($GRAMMAR_TYPE eq "phrase") {
-        system("mv grammar.gz grammar.tmp.gz; gzip -cd grammar.tmp.gz | $SCRIPTDIR/support/phrase2hiero.py | gzip -9n > grammar.gz; rm -rf grammar.tmp.gz");
-     }
   } else {
 
     print STDERR "* FATAL: There was no way to build a grammar, and none was passed in\n";
@@ -1181,7 +1177,7 @@ if (! defined $GRAMMAR_FILE) {
 }
 
 # Pack the entire model! Saves filtering and repacking of tuning and test sets
-if ($DO_PACK_GRAMMARS and ! $DO_FILTER_TM) {
+if ($DO_PACK_GRAMMARS and ! $DO_FILTER_TM and ! -e "grammar.packed") {
   $cachepipe->cmd("pack-grammar",
                   "$SCRIPTDIR/support/grammar-packer.pl -a -T $TMPDIR -m $PACKER_MEM -g $GRAMMAR_FILE -o $RUNDIR/grammar.packed",
                   "$RUNDIR/grammar.packed/vocabulary",


[10/15] incubator-joshua git commit: build two nodes over terminal productions

Posted by mj...@apache.org.
build two nodes over terminal productions


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/9b73d614
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/9b73d614
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/9b73d614

Branch: refs/heads/JOSHUA-284
Commit: 9b73d6147a61580058cc57c86c1f623f44b7452a
Parents: af4ef88
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 13:16:47 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 13:16:47 2016 -0500

----------------------------------------------------------------------
 .../apache/joshua/decoder/phrase/Candidate.java | 37 ++++++++++++++------
 .../joshua/decoder/phrase/Hypothesis.java       |  4 +--
 .../org/apache/joshua/decoder/phrase/Stack.java |  8 ++---
 3 files changed, 33 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9b73d614/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index 303749d..bd581e3 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -42,6 +42,7 @@ import org.apache.joshua.decoder.ff.FeatureFunction;
 import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 import org.apache.joshua.decoder.ff.tm.Rule;
 import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.hypergraph.HyperEdge;
 import org.apache.joshua.decoder.segment_file.Sentence;
 
 public class Candidate {
@@ -119,7 +120,7 @@ public class Candidate {
   public String toString() {
     return String.format("CANDIDATE(hyp %d/%d, phr %d/%d) [%s] phrase=[%s] span=%s",
         ranks[0], hypotheses.size(), ranks[1], phrases.size(),
-        getHypothesis(), getRule().getEnglishWords().replaceAll("\\[.*?\\] ",""), getSpan());
+        getHypothesis(), getPhraseNode(), getSpan());
   }
 
   public Candidate(List<FeatureFunction> featureFunctions, Sentence sentence, 
@@ -131,13 +132,13 @@ public class Candidate {
     this.span = span;
     this.future_delta = delta;
     this.ranks = ranks;
-    this.rule = isMonotonic() ? Hypothesis.MONO_RULE : Hypothesis.END_RULE;
+    this.rule = isMonotonic() ? Hypothesis.MONO_RULE : Hypothesis.SWAP_RULE;
 //    this.score = hypotheses.get(ranks[0]).score + phrases.get(ranks[1]).getEstimatedCost();
-    
+
     // TODO: compute this proactively or lazily according to a parameter
-    getResult();
+    computeResult();
 //    this.phraseNode = null;
-//    this.computedResult = null; 
+//    this.computedResult = null;
   }
   
   /**
@@ -206,13 +207,23 @@ public class Candidate {
   }
   
   /**
+   * A candidate is a (hypothesis, target phrase) pairing. The hypothesis and target phrase are
+   * drawn from a list that is indexed by (ranks[0], ranks[1]), respectively. This is a shortcut
+   * to return the rule representing the terminal phrase production of the candidate pair.
+   * 
+   * @return the phrase rule at position ranks[1]
+   */
+  public Rule getPhraseRule() {
+    return this.phrases.get(ranks[1]);
+  }
+  
+  /**
    * This returns a new Hypothesis (HGNode) representing the phrase being added, i.e., a terminal
    * production in the hypergraph. The score and DP state are computed only here on demand.
    * 
    * @return a new hypergraph node representing the phrase translation
    */
   public HGNode getPhraseNode() {
-    getResult();
     return phraseNode;
   }
   
@@ -222,10 +233,16 @@ public class Candidate {
    * 
    * @return
    */
-  public ComputeNodeResult getResult() {
+  public ComputeNodeResult computeResult() {
     if (computedResult == null) {
-      computedResult = new ComputeNodeResult(featureFunctions, getRule(), null, span.start, span.end, null, sentence);
-      phraseNode = new HGNode(-1, span.end, rule.getLHS(), computedResult.getDPStates(), null, computedResult.getPruningEstimate());
+      // add the phrase node
+      ComputeNodeResult phraseResult = new ComputeNodeResult(featureFunctions, getPhraseRule(), null, span.start, span.end, null, sentence);
+      HyperEdge edge = new HyperEdge(getPhraseRule(), phraseResult.getViterbiCost(), phraseResult.getTransitionCost(), null, null);
+      phraseNode = new HGNode(-1, span.end, rule.getLHS(), phraseResult.getDPStates(), edge, phraseResult.getPruningEstimate());
+
+      // add the rule
+      // TODO: sourcepath
+      computedResult = new ComputeNodeResult(featureFunctions, getRule(), getTailNodes(), -1, span.end, null, sentence);
     }
     
     return computedResult;
@@ -296,6 +313,6 @@ public class Candidate {
   }
   
   public List<DPState> getStates() {
-    return getResult().getDPStates();
+    return computeResult().getDPStates();
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9b73d614/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index 904634d..132d62d 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@ -71,8 +71,8 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
   public Hypothesis(Candidate cand) {
     // TODO: sourcepath
     super(-1, cand.span.end, cand.getRule().getLHS(), cand.getStates(), 
-        new HyperEdge(cand.getRule(), cand.getResult().getViterbiCost(), 
-            cand.getResult().getTransitionCost(),
+        new HyperEdge(cand.getRule(), cand.computeResult().getViterbiCost(), 
+            cand.computeResult().getTransitionCost(),
             cand.getTailNodes(), null), cand.score());
     this.coverage = cand.getCoverage();
   }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9b73d614/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
index ad24a51..3b8a976 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
@@ -215,12 +215,12 @@ public class Stack extends ArrayList<Hypothesis> {
 
     if (LOG.isDebugEnabled()) {
       LOG.debug("{} from ( ... {} )", taskName, complete.getHypothesis().getRule().getEnglishWords());
-      LOG.debug("        base score {}", complete.getResult().getBaseCost());
+      LOG.debug("        base score {}", complete.computeResult().getBaseCost());
       LOG.debug("        covering {}-{}", complete.getSpan().start - 1, complete.getSpan().end - 2);
-      LOG.debug("        translated as: {}", complete.getRule().getEnglishWords());
+      LOG.debug("        translated as: {}", complete.getPhraseRule().getEnglishWords());
       LOG.debug("        score {} + future cost {} = {}",
-          complete.getResult().getTransitionCost(), complete.getFutureEstimate(),
-          complete.getResult().getTransitionCost() + complete.getFutureEstimate());
+          complete.computeResult().getTransitionCost(), complete.getFutureEstimate(),
+          complete.computeResult().getTransitionCost() + complete.getFutureEstimate());
     }
   }
 }


[14/15] incubator-joshua git commit: bugfix: this is (probably) supposed to return the pruning estimate

Posted by mj...@apache.org.
bugfix: this is (probably) supposed to return the pruning estimate


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/574cb36b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/574cb36b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/574cb36b

Branch: refs/heads/JOSHUA-284
Commit: 574cb36b5e1b610e37eda81d6d76b4318c141a4c
Parents: 473b301
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 19:44:44 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 19:44:44 2016 -0500

----------------------------------------------------------------------
 .../org/apache/joshua/decoder/phrase/Candidate.java     | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/574cb36b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index cb9cd6d..9c7b3d1 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -296,12 +296,12 @@ public class Candidate {
    * @return the sum of two costs: the HypoState cost + the transition cost
    */
   public float score() {
-    float score = getHypothesis().getScore() + future_delta;
-    /* 
-     * TODO: you can add this if it's been computed.
-     */
-    if (computedResult != null)
-      score += computedResult.getTransitionCost();
+    float score = computedResult.getPruningEstimate();
+
+//    float score = getHypothesis().getScore() + future_delta;
+//    if (computedResult != null)
+//      score += computedResult.getTransitionCost();
+    
     return score;
   }
   


[09/15] incubator-joshua git commit: added derived directories

Posted by mj...@apache.org.
added derived directories


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/af4ef88d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/af4ef88d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/af4ef88d

Branch: refs/heads/JOSHUA-284
Commit: af4ef88d5a6a6a1cc4167ec421b4b6bd1a91dc0a
Parents: 048b2e3
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 13:15:36 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 13:15:36 2016 -0500

----------------------------------------------------------------------
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/af4ef88d/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 0d42974..8e03044 100644
--- a/.gitignore
+++ b/.gitignore
@@ -58,3 +58,5 @@ build
 /target/
 .project
 /doc/
+/ext.bak/
+/ext/


[03/15] incubator-joshua git commit: repacked the grammar

Posted by mj...@apache.org.
repacked the grammar


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/48a9aad7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/48a9aad7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/48a9aad7

Branch: refs/heads/JOSHUA-284
Commit: 48a9aad7873b969230aad90d6e0c61e13ae2d2b4
Parents: 32504c4
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 16 18:14:15 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 16 18:14:15 2016 -0400

----------------------------------------------------------------------
 .../decoder/phrase/decode/rules.packed/config   |   4 ++--
 .../decode/rules.packed/slice_00000.features    | Bin 4128858 -> 4128858 bytes
 .../decode/rules.packed/slice_00000.source      | Bin 1982244 -> 1982228 bytes
 .../decode/rules.packed/slice_00000.target      | Bin 2652936 -> 1463856 bytes
 .../rules.packed/slice_00000.target.lookup      | Bin 32 -> 28 bytes
 .../phrase/decode/rules.packed/vocabulary       | Bin 169236 -> 169225 bytes
 6 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/48a9aad7/src/test/resources/decoder/phrase/decode/rules.packed/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/config b/src/test/resources/decoder/phrase/decode/rules.packed/config
index 7bdb804..2251fe6 100644
--- a/src/test/resources/decoder/phrase/decode/rules.packed/config
+++ b/src/test/resources/decoder/phrase/decode/rules.packed/config
@@ -1,2 +1,2 @@
-version = 3
-max-source-len = 4
+version = 4
+max-source-len = 3

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/48a9aad7/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features
index c4127ff..27fa07d 100644
Binary files a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features and b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/48a9aad7/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source
index 83d47dc..cdc98f6 100644
Binary files a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source and b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/48a9aad7/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target
index 8094eef..fa82c0d 100644
Binary files a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target and b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/48a9aad7/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup
index 1c6db18..3e8c294 100644
Binary files a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup and b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/48a9aad7/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary b/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary
index e9b0900..ff62042 100644
Binary files a/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary and b/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary differ


[06/15] incubator-joshua git commit: temporary commenting-out of very verbose output

Posted by mj...@apache.org.
temporary commenting-out of very verbose output


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/1022699c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/1022699c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/1022699c

Branch: refs/heads/JOSHUA-284
Commit: 1022699cc744fa9fbc21f4b19122f51e3985a371
Parents: b1ec627
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 17 06:24:46 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 17 06:24:46 2016 -0400

----------------------------------------------------------------------
 src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1022699c/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
index 15fbec1..c11d46a 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
@@ -380,10 +380,10 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
     if (this.estimatedCost <= Float.NEGATIVE_INFINITY) {
       this.estimatedCost = 0.0f; // weights.innerProduct(computeFeatures());
 
-      LOG.debug("estimateCost({} ;; {})", getFrenchWords(), getEnglishWords());
+//      LOG.debug("estimateCost({} ;; {})", getFrenchWords(), getEnglishWords());
       for (FeatureFunction ff : models) {
         float val = ff.estimateCost(this, null);
-        LOG.debug("  FEATURE {} -> {}", ff.getName(), val);
+//        LOG.debug("  FEATURE {} -> {}", ff.getName(), val);
         this.estimatedCost += val; 
       }
     }


[08/15] incubator-joshua git commit: removed RHS nonterminal

Posted by mj...@apache.org.
removed RHS nonterminal


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/048b2e30
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/048b2e30
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/048b2e30

Branch: refs/heads/JOSHUA-284
Commit: 048b2e30f849de3f1ac82e6017ea2aab299f6b8d
Parents: 2e746c1
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Aug 19 13:15:18 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Aug 19 13:15:18 2016 -0500

----------------------------------------------------------------------
 src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/048b2e30/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index f87b728..904634d 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@ -43,9 +43,9 @@ public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
   private Coverage coverage;
 
   public static Rule BEGIN_RULE = new HieroFormatReader().parseLine("[GOAL] ||| <s> ||| <s> |||   ||| 0-0");
-  public static Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> |||   ||| 0-0 1-1");
-  public static Rule MONO_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] |||   ||| 0-0 1-1");
-  public static Rule SWAP_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] [GOAL,2] ||| [GOAL,2] [X,1] |||   ||| 0-1 1-0");
+  public static Rule END_RULE   = new HieroFormatReader().parseLine("[GOAL] ||| </s> ||| </s> |||   ||| 0-0");
+  public static Rule MONO_RULE  = new HieroFormatReader().parseLine("[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] |||   ||| 0-0 1-1");
+  public static Rule SWAP_RULE  = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] [GOAL,2] ||| [GOAL,2] [X,1] |||   ||| 0-1 1-0");
   
   public String toString() {
     StringBuffer sb = new StringBuffer();