You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/22 21:47:50 UTC

[24/27] incubator-joshua git commit: Merge branch 'master' into JOSHUA-284

Merge branch 'master' into JOSHUA-284


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/d28b4f39
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/d28b4f39
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/d28b4f39

Branch: refs/heads/master
Commit: d28b4f39c578197803beba2c376db5ed95774576
Parents: 25d28fe 2b570d2
Author: Matt Post <po...@cs.jhu.edu>
Authored: Sun Aug 21 12:36:37 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Sun Aug 21 12:36:37 2016 -0500

----------------------------------------------------------------------
 demo/README.md                                  |   2 +-
 demo/apache_joshua_logo.png                     | Bin 0 -> 306617 bytes
 demo/apache_joshua_logo_faded.png               | Bin 0 -> 309216 bytes
 demo/demo.config                                |   3 +
 demo/demo.js                                    |  19 +-
 demo/index.html                                 |  37 +-
 pom.xml                                         |  30 +-
 .../org/apache/joshua/adagrad/AdaGradCore.java  | 101 +++---
 .../org/apache/joshua/adagrad/Optimizer.java    | 348 +++++++++----------
 .../org/apache/joshua/corpus/BasicPhrase.java   |   2 +-
 .../apache/joshua/corpus/ContiguousPhrase.java  |   8 +-
 .../java/org/apache/joshua/corpus/Phrase.java   |   2 +-
 .../java/org/apache/joshua/corpus/Span.java     |   6 +-
 .../org/apache/joshua/corpus/SymbolTable.java   |   2 +-
 .../org/apache/joshua/corpus/Vocabulary.java    |  10 +-
 .../joshua/corpus/syntax/ArraySyntaxTree.java   |  51 +--
 .../apache/joshua/corpus/syntax/SyntaxTree.java |  10 +-
 .../org/apache/joshua/decoder/ArgsParser.java   |   8 +-
 .../java/org/apache/joshua/decoder/BLEU.java    |  72 ++--
 .../java/org/apache/joshua/decoder/Decoder.java |  26 +-
 .../apache/joshua/decoder/DecoderThread.java    |   2 +-
 .../joshua/decoder/JoshuaConfiguration.java     |  45 +--
 .../joshua/decoder/NbestMinRiskReranker.java    |  33 +-
 .../joshua/decoder/StructuredTranslation.java   |   9 +-
 .../decoder/StructuredTranslationFactory.java   |   5 +-
 .../org/apache/joshua/decoder/Translation.java  |   8 +-
 .../org/apache/joshua/decoder/Translations.java |   2 +-
 .../joshua/decoder/chart_parser/Cell.java       |  12 +-
 .../joshua/decoder/chart_parser/Chart.java      |  51 ++-
 .../decoder/chart_parser/ComputeNodeResult.java |   8 +-
 .../decoder/chart_parser/CubePruneState.java    |  20 +-
 .../joshua/decoder/chart_parser/DotChart.java   |  24 +-
 .../joshua/decoder/chart_parser/SourcePath.java |   4 +-
 .../decoder/chart_parser/StateConstraint.java   |   5 +-
 .../joshua/decoder/chart_parser/SuperNode.java  |   2 +-
 .../joshua/decoder/ff/FeatureFunction.java      |  24 +-
 .../apache/joshua/decoder/ff/FeatureVector.java |  21 +-
 .../joshua/decoder/ff/LabelCombinationFF.java   |   2 +-
 .../joshua/decoder/ff/LabelSubstitutionFF.java  |   8 +-
 .../joshua/decoder/ff/LexicalFeatures.java      |   2 +-
 .../apache/joshua/decoder/ff/OOVPenalty.java    |   7 +-
 .../apache/joshua/decoder/ff/PhraseModel.java   |   2 +-
 .../apache/joshua/decoder/ff/PhrasePenalty.java |   4 +-
 .../org/apache/joshua/decoder/ff/RuleFF.java    |   6 +-
 .../decoder/ff/RulePropertiesQuerying.java      |   6 +-
 .../org/apache/joshua/decoder/ff/RuleShape.java |   2 +-
 .../joshua/decoder/ff/SourceDependentFF.java    |   4 +-
 .../apache/joshua/decoder/ff/SourcePathFF.java  |   2 +-
 .../apache/joshua/decoder/ff/TargetBigram.java  |  13 +-
 .../ff/fragmentlm/ConcatenationIterator.java    |  10 +-
 .../decoder/ff/fragmentlm/FragmentLMFF.java     |  59 ++--
 .../ff/fragmentlm/PennTreebankReader.java       |  17 +-
 .../joshua/decoder/ff/fragmentlm/Tree.java      |  56 ++-
 .../joshua/decoder/ff/fragmentlm/Trees.java     |   8 +-
 .../org/apache/joshua/decoder/ff/lm/KenLM.java  |  26 +-
 .../joshua/decoder/ff/lm/LanguageModelFF.java   |  39 +--
 .../ff/lm/berkeley_lm/LMGrammarBerkeley.java    |   4 +-
 .../ff/lm/bloomfilter_lm/BloomFilter.java       |   2 +-
 .../BloomFilterLanguageModel.java               |  18 +-
 .../joshua/decoder/ff/lm/buildin_lm/TrieLM.java |  25 +-
 .../joshua/decoder/ff/phrase/Distortion.java    |   2 +-
 .../ff/similarity/EdgePhraseSimilarityFF.java   |  17 +-
 .../ff/state_maintenance/NgramDPState.java      |   6 +-
 .../joshua/decoder/ff/tm/AbstractGrammar.java   |  12 +-
 .../decoder/ff/tm/BasicRuleCollection.java      |   2 +-
 .../joshua/decoder/ff/tm/CreateGlueGrammar.java |   2 +-
 .../joshua/decoder/ff/tm/GrammarReader.java     |   2 +-
 .../apache/joshua/decoder/ff/tm/OwnerMap.java   |   2 +-
 .../org/apache/joshua/decoder/ff/tm/Rule.java   |  67 ++--
 .../decoder/ff/tm/SentenceFilteredGrammar.java  |  12 +-
 .../decoder/ff/tm/format/MosesFormatReader.java |   2 +-
 .../ff/tm/hash_based/ExtensionIterator.java     |   2 +-
 .../tm/hash_based/MemoryBasedBatchGrammar.java  |   8 +-
 .../decoder/ff/tm/packed/PackedGrammar.java     |  87 ++---
 .../ff/tm/packed/SliceAggregatingTrie.java      |   4 +-
 .../decoder/hypergraph/AlignedSourceTokens.java |   2 +-
 .../decoder/hypergraph/AllSpansWalker.java      |  19 +-
 .../hypergraph/DefaultInsideOutside.java        |  34 +-
 .../joshua/decoder/hypergraph/ForestWalker.java |  10 +-
 .../GrammarBuilderWalkerFunction.java           |  14 +-
 .../joshua/decoder/hypergraph/HGNode.java       |  54 ++-
 .../joshua/decoder/hypergraph/HyperEdge.java    |   6 +-
 .../joshua/decoder/hypergraph/HyperGraph.java   |  30 +-
 .../decoder/hypergraph/HyperGraphPruning.java   |   9 +-
 .../decoder/hypergraph/KBestExtractor.java      |  51 ++-
 .../hypergraph/OutputStringExtractor.java       |   8 +-
 .../hypergraph/StringToTreeConverter.java       |  16 +-
 .../decoder/hypergraph/ViterbiExtractor.java    |  10 +-
 .../hypergraph/WordAlignmentExtractor.java      |   2 +-
 .../decoder/hypergraph/WordAlignmentState.java  |   8 +-
 .../apache/joshua/decoder/io/JSONMessage.java   |  18 +-
 .../decoder/io/TranslationRequestStream.java    |   6 +-
 .../apache/joshua/decoder/phrase/Candidate.java |   4 +-
 .../apache/joshua/decoder/phrase/Coverage.java  |   2 +-
 .../apache/joshua/decoder/phrase/Future.java    |   4 +-
 .../apache/joshua/decoder/phrase/Header.java    |  87 +++++
 .../joshua/decoder/phrase/Hypothesis.java       |   5 +-
 .../joshua/decoder/phrase/PhraseChart.java      |  20 +-
 .../joshua/decoder/phrase/PhraseTable.java      |   4 +-
 .../org/apache/joshua/decoder/phrase/Stack.java |  12 +-
 .../apache/joshua/decoder/phrase/Stacks.java    |  23 +-
 .../decoder/segment_file/ConstraintRule.java    |   4 +-
 .../joshua/decoder/segment_file/Sentence.java   |  18 +-
 .../joshua/decoder/segment_file/Token.java      |   9 +-
 .../java/org/apache/joshua/pro/PROCore.java     |  22 +-
 .../org/apache/joshua/server/ServerThread.java  |   9 +-
 .../phrase/decode/PhraseDecodingTest.java       |  10 +
 .../apache/joshua/system/LmOovFeatureTest.java  |  11 +-
 108 files changed, 1072 insertions(+), 1030 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
index d92665d,280ea5a..ddbd222
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
@@@ -46,16 -46,16 +46,16 @@@ public class ComputeNodeResult 
    private static final Logger LOG = LoggerFactory.getLogger(ComputeNodeResult.class);
  
    // The cost incurred by the rule itself (and all associated feature functions)
 -  private final float transitionCost;
 +  private float transitionCost;
  
    // transitionCost + the Viterbi costs of the tail nodes.
 -  private final float viterbiCost;
 -
 -  // viterbiCost + a future estimate (outside cost estimate).
 -  private final float pruningCostEstimate;
 +  private float viterbiCost;
  
 +  // The future or outside cost (estimated)
 +  private float futureCostEstimate;
 +  
    // The StateComputer objects themselves serve as keys.
-   private List<DPState> dpStates;
+   private final List<DPState> dpStates;
  
    /**
     * Computes the new state(s) that are produced when applying the given rule to the list of tail
@@@ -99,13 -99,13 +99,13 @@@
        }
      }
  
-     List<DPState> allDPStates = new ArrayList<DPState>();
+     List<DPState> allDPStates = new ArrayList<>();
  
      // The transition cost is the new cost incurred by applying this rule
 -    float transitionCost = 0.0f;
 +    this.transitionCost = 0.0f;
  
      // The future cost estimate is a heuristic estimate of the outside cost of this edge.
 -    float futureCostEstimate = 0.0f;
 +    this.futureCostEstimate = 0.0f;
  
      /*
       * We now iterate over all the feature functions, computing their cost and their expected future
@@@ -115,7 -115,7 +115,7 @@@
        FeatureFunction.ScoreAccumulator acc = feature.new ScoreAccumulator(); 
  
        DPState newState = feature.compute(rule, tailNodes, i, j, sourcePath, sentence, acc);
--      transitionCost += acc.getScore();
++      this.transitionCost += acc.getScore();
  
  
        if (LOG.isDebugEnabled()) {
@@@ -129,10 -129,13 +129,10 @@@
          allDPStates.add(((StatefulFF)feature).getStateIndex(), newState);
        }
      }
--    viterbiCost += transitionCost;
++    this.viterbiCost += transitionCost;
      if (LOG.isDebugEnabled())
        LOG.debug("-> COST = {}", transitionCost);
 -    // Set the final results.
 -    this.pruningCostEstimate = viterbiCost + futureCostEstimate;
 -    this.viterbiCost = viterbiCost;
 -    this.transitionCost = transitionCost;
 +
      this.dpStates = allDPStates;
    }
  

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index a446eab,93e21cd..2a5dc03
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@@ -42,41 -33,26 +42,41 @@@ import org.apache.joshua.decoder.ff.Fea
  import org.apache.joshua.decoder.ff.state_maintenance.DPState;
  import org.apache.joshua.decoder.ff.tm.Rule;
  import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.hypergraph.HyperEdge;
 +import org.apache.joshua.decoder.segment_file.Sentence;
  
 -public class Candidate {
 -
 +public class Candidate implements Comparable<Candidate> {
 +  
 +  private List<FeatureFunction> featureFunctions;
 +  private Sentence sentence;
 +  
    // the set of hypotheses that can be paired with phrases from this span 
-   private List<Hypothesis> hypotheses;
+   private final List<Hypothesis> hypotheses;
  
    // the list of target phrases gathered from a span of the input
 -  private final TargetPhrases phrases;
 -
 -  // source span of new phrase
 -  public final Span span;
 +  private TargetPhrases phrases;
    
    // future cost of applying phrases to hypotheses
 -  final float future_delta;
 +  private float future_delta;
    
    // indices into the hypotheses and phrases arrays (used for cube pruning)
-   private int[] ranks;
+   private final int[] ranks;
    
 -  // scoring and state information 
 -  private ComputeNodeResult result;
 +  // the reordering rule used by an instantiated Candidate
 +  private Rule rule;
 +  
 +  /* 
 +   * Stores the inside cost of the current phrase, as well as the computed dynamic programming
 +   * state. Expensive to compute so there is an option of delaying it.
 +   */
 +  private ComputeNodeResult computedResult;
 +  
 +  /*
 +   * This is the HGNode built over the current target side phrase. It requires the computed results
 +   * as part of its constructor, so we delay computing it unless needed.
 +   */
 +  private HGNode phraseNode;
 +  private ComputeNodeResult phraseResult;
    
    /**
     * When candidate objects are extended, the new one is initialized with the same underlying

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Future.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index 8ef5597,af5069d..2710a48
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@@ -40,17 -39,14 +40,16 @@@ import org.apache.joshua.decoder.hyperg
  public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
  
    // The hypothesis' coverage vector
-   private Coverage coverage;
+   private final Coverage coverage;
  
 -  public static final Rule BEGIN_RULE = new HieroFormatReader().parseLine("[X] ||| <s> ||| <s> |||   ||| 0-0");
 -  public static final Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] </s> ||| [X,1] </s> |||   ||| 0-0 1-1");
 -
 +  public static Rule BEGIN_RULE = new HieroFormatReader().parseLine("[GOAL] ||| <s> ||| <s> |||   ||| 0-0");
 +  public static Rule END_RULE   = new HieroFormatReader().parseLine("[GOAL] ||| </s> ||| </s> |||   ||| 0-0");
 +  public static Rule MONO_RULE  = new HieroFormatReader().parseLine("[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] |||   ||| 0-0 1-1");
 +  public static Rule SWAP_RULE  = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] [GOAL,2] ||| [GOAL,2] [X,1] |||   ||| 0-1 1-0");
 +  
    public String toString() {
      StringBuffer sb = new StringBuffer();
-     for (DPState state: getDPStates())
-       sb.append(state);
+     getDPStates().forEach(sb::append);
      String words = bestHyperedge.getRule().getEnglishWords();
  //  return String.format("HYP[%s] %.5f j=%d words=%s state=%s", coverage, score, j, words, sb);
      return String.format("HYP[%s] j=%d words=[%s] state=%s", coverage, j, words, sb);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/phrase/Stack.java
index 67f62b6,6661dfb..47a3396
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
@@@ -40,19 -43,20 +40,19 @@@ public class Stack extends ArrayList<Hy
  
    private static final long serialVersionUID = 7885252799032416068L;
  
-   private HashMap<Coverage, ArrayList<Hypothesis>> coverages;
+   private final HashMap<Coverage, ArrayList<Hypothesis>> coverages;
    
 -  private final Sentence sentence;
 -  private final List<FeatureFunction> featureFunctions;
 -  private final JoshuaConfiguration config;
 +  private Sentence sentence;
 +  private JoshuaConfiguration config;
  
    /* The list of states we've already visited. */
-   private HashSet<Candidate> visitedStates;
+   private final HashSet<Candidate> visitedStates;
    
    /* A list of candidates sorted for consideration for entry to the chart (for cube pruning) */
-   private PriorityQueue<Candidate> candidates;
+   private final PriorityQueue<Candidate> candidates;
    
    /* Short-circuits adding a cube-prune state more than once */
-   private HashMap<Hypothesis, Hypothesis> deduper;
+   private final HashMap<Hypothesis, Hypothesis> deduper;
    
    /**
     * Create a new stack. Stacks are organized one for each number of source words that are covered.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --cc src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index d3db223,f2fc6a7..5e878cb
--- a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@@ -59,19 -60,12 +59,29 @@@ public class PhraseDecodingTest 
      decoder = null;
    }
  
 -  @Test(enabled = false)
 +  @Test(enabled = true)
    public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
 -    final String translation = decode(INPUT).toString();
 -    final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
 -    assertEquals(gold, translation);
 +    final String translation = decode(INPUT).toString().trim();
 +    final String gold = OUTPUT;
 +    assertEquals(translation, gold);
 +  }
 +  
 +  @Test(enabled = false)
 +  public void givenInput_whenPhraseDecodingWithAlignments_thenOutputHasAlignments() throws IOException {
 +    final String translation = decode(INPUT).toString().trim();
 +    final String gold = OUTPUT_WITH_ALIGNMENTS;
 +    assertEquals(translation, gold);
 +  }
++  
++  @Test(enabled = true)
++  public void givenInput_whenPhraseDecoding_thenInputCanBeRetrieved() throws IOException {
++    String outputFormat = joshuaConfig.outputFormat;
++    joshuaConfig.outputFormat = "%e";
++    final String translation = decode(INPUT).toString().trim();
++    joshuaConfig.outputFormat = outputFormat;
++    final String gold = INPUT;
++    assertEquals(translation, gold);
+   }
  
    private Translation decode(String input) {
      final Sentence sentence = new Sentence(input, 0, joshuaConfig);