You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/22 21:47:50 UTC
[24/27] incubator-joshua git commit: Merge branch 'master' into
JOSHUA-284
Merge branch 'master' into JOSHUA-284
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/d28b4f39
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/d28b4f39
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/d28b4f39
Branch: refs/heads/master
Commit: d28b4f39c578197803beba2c376db5ed95774576
Parents: 25d28fe 2b570d2
Author: Matt Post <po...@cs.jhu.edu>
Authored: Sun Aug 21 12:36:37 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Sun Aug 21 12:36:37 2016 -0500
----------------------------------------------------------------------
demo/README.md | 2 +-
demo/apache_joshua_logo.png | Bin 0 -> 306617 bytes
demo/apache_joshua_logo_faded.png | Bin 0 -> 309216 bytes
demo/demo.config | 3 +
demo/demo.js | 19 +-
demo/index.html | 37 +-
pom.xml | 30 +-
.../org/apache/joshua/adagrad/AdaGradCore.java | 101 +++---
.../org/apache/joshua/adagrad/Optimizer.java | 348 +++++++++----------
.../org/apache/joshua/corpus/BasicPhrase.java | 2 +-
.../apache/joshua/corpus/ContiguousPhrase.java | 8 +-
.../java/org/apache/joshua/corpus/Phrase.java | 2 +-
.../java/org/apache/joshua/corpus/Span.java | 6 +-
.../org/apache/joshua/corpus/SymbolTable.java | 2 +-
.../org/apache/joshua/corpus/Vocabulary.java | 10 +-
.../joshua/corpus/syntax/ArraySyntaxTree.java | 51 +--
.../apache/joshua/corpus/syntax/SyntaxTree.java | 10 +-
.../org/apache/joshua/decoder/ArgsParser.java | 8 +-
.../java/org/apache/joshua/decoder/BLEU.java | 72 ++--
.../java/org/apache/joshua/decoder/Decoder.java | 26 +-
.../apache/joshua/decoder/DecoderThread.java | 2 +-
.../joshua/decoder/JoshuaConfiguration.java | 45 +--
.../joshua/decoder/NbestMinRiskReranker.java | 33 +-
.../joshua/decoder/StructuredTranslation.java | 9 +-
.../decoder/StructuredTranslationFactory.java | 5 +-
.../org/apache/joshua/decoder/Translation.java | 8 +-
.../org/apache/joshua/decoder/Translations.java | 2 +-
.../joshua/decoder/chart_parser/Cell.java | 12 +-
.../joshua/decoder/chart_parser/Chart.java | 51 ++-
.../decoder/chart_parser/ComputeNodeResult.java | 8 +-
.../decoder/chart_parser/CubePruneState.java | 20 +-
.../joshua/decoder/chart_parser/DotChart.java | 24 +-
.../joshua/decoder/chart_parser/SourcePath.java | 4 +-
.../decoder/chart_parser/StateConstraint.java | 5 +-
.../joshua/decoder/chart_parser/SuperNode.java | 2 +-
.../joshua/decoder/ff/FeatureFunction.java | 24 +-
.../apache/joshua/decoder/ff/FeatureVector.java | 21 +-
.../joshua/decoder/ff/LabelCombinationFF.java | 2 +-
.../joshua/decoder/ff/LabelSubstitutionFF.java | 8 +-
.../joshua/decoder/ff/LexicalFeatures.java | 2 +-
.../apache/joshua/decoder/ff/OOVPenalty.java | 7 +-
.../apache/joshua/decoder/ff/PhraseModel.java | 2 +-
.../apache/joshua/decoder/ff/PhrasePenalty.java | 4 +-
.../org/apache/joshua/decoder/ff/RuleFF.java | 6 +-
.../decoder/ff/RulePropertiesQuerying.java | 6 +-
.../org/apache/joshua/decoder/ff/RuleShape.java | 2 +-
.../joshua/decoder/ff/SourceDependentFF.java | 4 +-
.../apache/joshua/decoder/ff/SourcePathFF.java | 2 +-
.../apache/joshua/decoder/ff/TargetBigram.java | 13 +-
.../ff/fragmentlm/ConcatenationIterator.java | 10 +-
.../decoder/ff/fragmentlm/FragmentLMFF.java | 59 ++--
.../ff/fragmentlm/PennTreebankReader.java | 17 +-
.../joshua/decoder/ff/fragmentlm/Tree.java | 56 ++-
.../joshua/decoder/ff/fragmentlm/Trees.java | 8 +-
.../org/apache/joshua/decoder/ff/lm/KenLM.java | 26 +-
.../joshua/decoder/ff/lm/LanguageModelFF.java | 39 +--
.../ff/lm/berkeley_lm/LMGrammarBerkeley.java | 4 +-
.../ff/lm/bloomfilter_lm/BloomFilter.java | 2 +-
.../BloomFilterLanguageModel.java | 18 +-
.../joshua/decoder/ff/lm/buildin_lm/TrieLM.java | 25 +-
.../joshua/decoder/ff/phrase/Distortion.java | 2 +-
.../ff/similarity/EdgePhraseSimilarityFF.java | 17 +-
.../ff/state_maintenance/NgramDPState.java | 6 +-
.../joshua/decoder/ff/tm/AbstractGrammar.java | 12 +-
.../decoder/ff/tm/BasicRuleCollection.java | 2 +-
.../joshua/decoder/ff/tm/CreateGlueGrammar.java | 2 +-
.../joshua/decoder/ff/tm/GrammarReader.java | 2 +-
.../apache/joshua/decoder/ff/tm/OwnerMap.java | 2 +-
.../org/apache/joshua/decoder/ff/tm/Rule.java | 67 ++--
.../decoder/ff/tm/SentenceFilteredGrammar.java | 12 +-
.../decoder/ff/tm/format/MosesFormatReader.java | 2 +-
.../ff/tm/hash_based/ExtensionIterator.java | 2 +-
.../tm/hash_based/MemoryBasedBatchGrammar.java | 8 +-
.../decoder/ff/tm/packed/PackedGrammar.java | 87 ++---
.../ff/tm/packed/SliceAggregatingTrie.java | 4 +-
.../decoder/hypergraph/AlignedSourceTokens.java | 2 +-
.../decoder/hypergraph/AllSpansWalker.java | 19 +-
.../hypergraph/DefaultInsideOutside.java | 34 +-
.../joshua/decoder/hypergraph/ForestWalker.java | 10 +-
.../GrammarBuilderWalkerFunction.java | 14 +-
.../joshua/decoder/hypergraph/HGNode.java | 54 ++-
.../joshua/decoder/hypergraph/HyperEdge.java | 6 +-
.../joshua/decoder/hypergraph/HyperGraph.java | 30 +-
.../decoder/hypergraph/HyperGraphPruning.java | 9 +-
.../decoder/hypergraph/KBestExtractor.java | 51 ++-
.../hypergraph/OutputStringExtractor.java | 8 +-
.../hypergraph/StringToTreeConverter.java | 16 +-
.../decoder/hypergraph/ViterbiExtractor.java | 10 +-
.../hypergraph/WordAlignmentExtractor.java | 2 +-
.../decoder/hypergraph/WordAlignmentState.java | 8 +-
.../apache/joshua/decoder/io/JSONMessage.java | 18 +-
.../decoder/io/TranslationRequestStream.java | 6 +-
.../apache/joshua/decoder/phrase/Candidate.java | 4 +-
.../apache/joshua/decoder/phrase/Coverage.java | 2 +-
.../apache/joshua/decoder/phrase/Future.java | 4 +-
.../apache/joshua/decoder/phrase/Header.java | 87 +++++
.../joshua/decoder/phrase/Hypothesis.java | 5 +-
.../joshua/decoder/phrase/PhraseChart.java | 20 +-
.../joshua/decoder/phrase/PhraseTable.java | 4 +-
.../org/apache/joshua/decoder/phrase/Stack.java | 12 +-
.../apache/joshua/decoder/phrase/Stacks.java | 23 +-
.../decoder/segment_file/ConstraintRule.java | 4 +-
.../joshua/decoder/segment_file/Sentence.java | 18 +-
.../joshua/decoder/segment_file/Token.java | 9 +-
.../java/org/apache/joshua/pro/PROCore.java | 22 +-
.../org/apache/joshua/server/ServerThread.java | 9 +-
.../phrase/decode/PhraseDecodingTest.java | 10 +
.../apache/joshua/system/LmOovFeatureTest.java | 11 +-
108 files changed, 1072 insertions(+), 1030 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
index d92665d,280ea5a..ddbd222
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
@@@ -46,16 -46,16 +46,16 @@@ public class ComputeNodeResult
private static final Logger LOG = LoggerFactory.getLogger(ComputeNodeResult.class);
// The cost incurred by the rule itself (and all associated feature functions)
- private final float transitionCost;
+ private float transitionCost;
// transitionCost + the Viterbi costs of the tail nodes.
- private final float viterbiCost;
-
- // viterbiCost + a future estimate (outside cost estimate).
- private final float pruningCostEstimate;
+ private float viterbiCost;
+ // The future or outside cost (estimated)
+ private float futureCostEstimate;
+
// The StateComputer objects themselves serve as keys.
- private List<DPState> dpStates;
+ private final List<DPState> dpStates;
/**
* Computes the new state(s) that are produced when applying the given rule to the list of tail
@@@ -99,13 -99,13 +99,13 @@@
}
}
- List<DPState> allDPStates = new ArrayList<DPState>();
+ List<DPState> allDPStates = new ArrayList<>();
// The transition cost is the new cost incurred by applying this rule
- float transitionCost = 0.0f;
+ this.transitionCost = 0.0f;
// The future cost estimate is a heuristic estimate of the outside cost of this edge.
- float futureCostEstimate = 0.0f;
+ this.futureCostEstimate = 0.0f;
/*
* We now iterate over all the feature functions, computing their cost and their expected future
@@@ -115,7 -115,7 +115,7 @@@
FeatureFunction.ScoreAccumulator acc = feature.new ScoreAccumulator();
DPState newState = feature.compute(rule, tailNodes, i, j, sourcePath, sentence, acc);
-- transitionCost += acc.getScore();
++ this.transitionCost += acc.getScore();
if (LOG.isDebugEnabled()) {
@@@ -129,10 -129,13 +129,10 @@@
allDPStates.add(((StatefulFF)feature).getStateIndex(), newState);
}
}
-- viterbiCost += transitionCost;
++ this.viterbiCost += transitionCost;
if (LOG.isDebugEnabled())
LOG.debug("-> COST = {}", transitionCost);
- // Set the final results.
- this.pruningCostEstimate = viterbiCost + futureCostEstimate;
- this.viterbiCost = viterbiCost;
- this.transitionCost = transitionCost;
+
this.dpStates = allDPStates;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index a446eab,93e21cd..2a5dc03
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@@ -42,41 -33,26 +42,41 @@@ import org.apache.joshua.decoder.ff.Fea
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.hypergraph.HyperEdge;
+import org.apache.joshua.decoder.segment_file.Sentence;
-public class Candidate {
-
+public class Candidate implements Comparable<Candidate> {
+
+ private List<FeatureFunction> featureFunctions;
+ private Sentence sentence;
+
// the set of hypotheses that can be paired with phrases from this span
- private List<Hypothesis> hypotheses;
+ private final List<Hypothesis> hypotheses;
// the list of target phrases gathered from a span of the input
- private final TargetPhrases phrases;
-
- // source span of new phrase
- public final Span span;
+ private TargetPhrases phrases;
// future cost of applying phrases to hypotheses
- final float future_delta;
+ private float future_delta;
// indices into the hypotheses and phrases arrays (used for cube pruning)
- private int[] ranks;
+ private final int[] ranks;
- // scoring and state information
- private ComputeNodeResult result;
+ // the reordering rule used by an instantiated Candidate
+ private Rule rule;
+
+ /*
+ * Stores the inside cost of the current phrase, as well as the computed dynamic programming
+ * state. Expensive to compute so there is an option of delaying it.
+ */
+ private ComputeNodeResult computedResult;
+
+ /*
+ * This is the HGNode built over the current target side phrase. It requires the computed results
+ * as part of its constructor, so we delay computing it unless needed.
+ */
+ private HGNode phraseNode;
+ private ComputeNodeResult phraseResult;
/**
* When candidate objects are extended, the new one is initialized with the same underlying
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Future.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index 8ef5597,af5069d..2710a48
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@@ -40,17 -39,14 +40,16 @@@ import org.apache.joshua.decoder.hyperg
public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
// The hypothesis' coverage vector
- private Coverage coverage;
+ private final Coverage coverage;
- public static final Rule BEGIN_RULE = new HieroFormatReader().parseLine("[X] ||| <s> ||| <s> ||| ||| 0-0");
- public static final Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] </s> ||| [X,1] </s> ||| ||| 0-0 1-1");
-
+ public static Rule BEGIN_RULE = new HieroFormatReader().parseLine("[GOAL] ||| <s> ||| <s> ||| ||| 0-0");
+ public static Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| </s> ||| </s> ||| ||| 0-0");
+ public static Rule MONO_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| ||| 0-0 1-1");
+ public static Rule SWAP_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] [GOAL,2] ||| [GOAL,2] [X,1] ||| ||| 0-1 1-0");
+
public String toString() {
StringBuffer sb = new StringBuffer();
- for (DPState state: getDPStates())
- sb.append(state);
+ getDPStates().forEach(sb::append);
String words = bestHyperedge.getRule().getEnglishWords();
// return String.format("HYP[%s] %.5f j=%d words=%s state=%s", coverage, score, j, words, sb);
return String.format("HYP[%s] j=%d words=[%s] state=%s", coverage, j, words, sb);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/phrase/Stack.java
index 67f62b6,6661dfb..47a3396
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
@@@ -40,19 -43,20 +40,19 @@@ public class Stack extends ArrayList<Hy
private static final long serialVersionUID = 7885252799032416068L;
- private HashMap<Coverage, ArrayList<Hypothesis>> coverages;
+ private final HashMap<Coverage, ArrayList<Hypothesis>> coverages;
- private final Sentence sentence;
- private final List<FeatureFunction> featureFunctions;
- private final JoshuaConfiguration config;
+ private Sentence sentence;
+ private JoshuaConfiguration config;
/* The list of states we've already visited. */
- private HashSet<Candidate> visitedStates;
+ private final HashSet<Candidate> visitedStates;
/* A list of candidates sorted for consideration for entry to the chart (for cube pruning) */
- private PriorityQueue<Candidate> candidates;
+ private final PriorityQueue<Candidate> candidates;
/* Short-circuits adding a cube-prune state more than once */
- private HashMap<Hypothesis, Hypothesis> deduper;
+ private final HashMap<Hypothesis, Hypothesis> deduper;
/**
* Create a new stack. Stacks are organized one for each number of source words that are covered.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --cc src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index d3db223,f2fc6a7..5e878cb
--- a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@@ -59,19 -60,12 +59,29 @@@ public class PhraseDecodingTest
decoder = null;
}
- @Test(enabled = false)
+ @Test(enabled = true)
public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
- final String translation = decode(INPUT).toString();
- final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
- assertEquals(gold, translation);
+ final String translation = decode(INPUT).toString().trim();
+ final String gold = OUTPUT;
+ assertEquals(translation, gold);
+ }
+
+ @Test(enabled = false)
+ public void givenInput_whenPhraseDecodingWithAlignments_thenOutputHasAlignments() throws IOException {
+ final String translation = decode(INPUT).toString().trim();
+ final String gold = OUTPUT_WITH_ALIGNMENTS;
+ assertEquals(translation, gold);
+ }
++
++ @Test(enabled = true)
++ public void givenInput_whenPhraseDecoding_thenInputCanBeRetrieved() throws IOException {
++ String outputFormat = joshuaConfig.outputFormat;
++ joshuaConfig.outputFormat = "%e";
++ final String translation = decode(INPUT).toString().trim();
++ joshuaConfig.outputFormat = outputFormat;
++ final String gold = INPUT;
++ assertEquals(translation, gold);
+ }
private Translation decode(String input) {
final Sentence sentence = new Sentence(input, 0, joshuaConfig);