You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/22 21:11:58 UTC
[1/3] incubator-joshua git commit: Removed owner ids from Vocabulary.
These are now maintained in their own mapping. Fixes a bug with multiple
packed grammars that would overwrite each others owner Vocab id. Also cleaned
up grammar constructors a little
Repository: incubator-joshua
Updated Branches:
refs/heads/master 2b5b4dc0d -> f3a511836
Removed owner ids from Vocabulary. These are now maintained in their own mapping. Fixes a bug with multiple packed grammars that would overwrite each others owner Vocab id. Also cleaned up grammar constructors a little bit.
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/1011bbb0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/1011bbb0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/1011bbb0
Branch: refs/heads/master
Commit: 1011bbb03b29b57eb2903e4817a4d6a3d553354e
Parents: 8fc7544
Author: Felix Hieber <fh...@amazon.com>
Authored: Mon Jun 20 17:55:23 2016 +0200
Committer: Felix Hieber <fh...@amazon.com>
Committed: Tue Jun 21 12:12:18 2016 +0200
----------------------------------------------------------------------
.../java/org/apache/joshua/decoder/Decoder.java | 26 +++---
.../joshua/decoder/chart_parser/Chart.java | 2 +-
.../joshua/decoder/ff/ArityPhrasePenalty.java | 11 +--
.../joshua/decoder/ff/LexicalFeatures.java | 8 +-
.../apache/joshua/decoder/ff/OOVPenalty.java | 10 ++-
.../apache/joshua/decoder/ff/PhraseModel.java | 25 +++---
.../apache/joshua/decoder/ff/PhrasePenalty.java | 15 ++--
.../apache/joshua/decoder/ff/RuleCountBin.java | 7 +-
.../org/apache/joshua/decoder/ff/RuleFF.java | 9 ++-
.../decoder/ff/fragmentlm/FragmentLMFF.java | 11 ++-
.../joshua/decoder/ff/tm/AbstractGrammar.java | 30 +++----
.../apache/joshua/decoder/ff/tm/Grammar.java | 2 +-
.../apache/joshua/decoder/ff/tm/OwnerId.java | 52 ++++++++++++
.../apache/joshua/decoder/ff/tm/OwnerMap.java | 83 ++++++++++++++++++++
.../org/apache/joshua/decoder/ff/tm/Rule.java | 20 ++---
.../decoder/ff/tm/SentenceFilteredGrammar.java | 2 +-
.../tm/hash_based/MemoryBasedBatchGrammar.java | 43 ++++------
.../decoder/ff/tm/packed/PackedGrammar.java | 14 ++--
.../GrammarBuilderWalkerFunction.java | 3 +-
.../decoder/hypergraph/HyperGraphPruning.java | 2 +-
.../joshua/decoder/phrase/PhraseTable.java | 10 ++-
.../apache/joshua/decoder/phrase/Stacks.java | 4 +-
.../class_lm/ClassBasedLanguageModelTest.java | 3 +-
.../joshua/decoder/ff/tm/OwnerMapTest.java | 39 +++++++++
.../apache/joshua/system/AlignmentMapTest.java | 3 +-
25 files changed, 307 insertions(+), 127 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Decoder.java b/src/main/java/org/apache/joshua/decoder/Decoder.java
index 097ce59..f319e40 100644
--- a/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -19,6 +19,7 @@
package org.apache.joshua.decoder;
import static org.apache.joshua.decoder.ff.FeatureVector.DENSE_FEATURE_NAMES;
+import static org.apache.joshua.decoder.ff.tm.OwnerMap.getOwner;
import java.io.BufferedWriter;
import java.io.File;
@@ -29,6 +30,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
@@ -41,6 +43,8 @@ import org.apache.joshua.decoder.ff.PhraseModel;
import org.apache.joshua.decoder.ff.StatefulFF;
import org.apache.joshua.decoder.ff.lm.LanguageModelFF;
import org.apache.joshua.decoder.ff.tm.Grammar;
+import org.apache.joshua.decoder.ff.tm.OwnerId;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
import org.apache.joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
@@ -363,6 +367,7 @@ public class Decoder {
public static void resetGlobalState() {
// clear/reset static variables
+ OwnerMap.clear();
DENSE_FEATURE_NAMES.clear();
Vocabulary.clear();
Vocabulary.unregisterLanguageModels();
@@ -594,10 +599,6 @@ public class Decoder {
} else {
- int maxSourceLen = parsedArgs.containsKey("max-source-len")
- ? Integer.parseInt(parsedArgs.get("max-source-len"))
- : -1;
-
joshuaConfiguration.search_algorithm = "stack";
grammar = new PhraseTable(path, owner, type, joshuaConfiguration);
}
@@ -609,8 +610,7 @@ public class Decoder {
} else {
LOG.warn("no grammars supplied! Supplying dummy glue grammar.");
- MemoryBasedBatchGrammar glueGrammar = new MemoryBasedBatchGrammar("glue", joshuaConfiguration);
- glueGrammar.setSpanLimit(-1);
+ MemoryBasedBatchGrammar glueGrammar = new MemoryBasedBatchGrammar("glue", joshuaConfiguration, -1);
glueGrammar.addGlueRules(featureFunctions);
this.grammars.add(glueGrammar);
}
@@ -619,14 +619,13 @@ public class Decoder {
if (joshuaConfiguration.search_algorithm.equals("stack"))
this.customPhraseTable = new PhraseTable(null, "custom", "phrase", joshuaConfiguration);
else
- this.customPhraseTable = new MemoryBasedBatchGrammar("custom", joshuaConfiguration);
+ this.customPhraseTable = new MemoryBasedBatchGrammar("custom", joshuaConfiguration, 20);
this.grammars.add(this.customPhraseTable);
/* Create an epsilon-deleting grammar */
if (joshuaConfiguration.lattice_decoding) {
LOG.info("Creating an epsilon-deleting grammar");
- MemoryBasedBatchGrammar latticeGrammar = new MemoryBasedBatchGrammar("lattice", joshuaConfiguration);
- latticeGrammar.setSpanLimit(-1);
+ MemoryBasedBatchGrammar latticeGrammar = new MemoryBasedBatchGrammar("lattice", joshuaConfiguration, -1);
HieroFormatReader reader = new HieroFormatReader();
String goalNT = FormatUtils.cleanNonTerminal(joshuaConfiguration.goal_symbol);
@@ -644,13 +643,14 @@ public class Decoder {
}
/* Now create a feature function for each owner */
- HashSet<String> ownersSeen = new HashSet<String>();
+ final Set<OwnerId> ownersSeen = new HashSet<OwnerId>();
for (Grammar grammar: this.grammars) {
- String owner = Vocabulary.word(grammar.getOwner());
+ OwnerId owner = grammar.getOwner();
if (! ownersSeen.contains(owner)) {
- this.featureFunctions.add(new PhraseModel(weights, new String[] { "tm", "-owner", owner },
- joshuaConfiguration, grammar));
+ this.featureFunctions.add(
+ new PhraseModel(
+ weights, new String[] { "tm", "-owner", getOwner(owner) }, joshuaConfiguration, grammar));
ownersSeen.add(owner);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
index d0cd96b..355a6f1 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
@@ -139,7 +139,7 @@ public class Chart {
for (int i = 0; i < grammars.length; i++)
this.grammars[i + 1] = grammars[i];
- MemoryBasedBatchGrammar oovGrammar = new MemoryBasedBatchGrammar("oov", this.config);
+ MemoryBasedBatchGrammar oovGrammar = new MemoryBasedBatchGrammar("oov", this.config, 20);
AbstractGrammar.addOOVRules(oovGrammar, sentence.getLattice(), featureFunctions,
this.config.true_oovs_only);
this.grammars[0] = oovGrammar;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java b/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
index ae273b7..f544f50 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
@@ -21,12 +21,13 @@ package org.apache.joshua.decoder.ff;
import java.util.List;
import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.OwnerId;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
-import org.apache.joshua.decoder.chart_parser.SourcePath;
-import org.apache.joshua.corpus.Vocabulary;
/**
* This feature function counts rules from a particular grammar (identified by the owner) having an
@@ -39,14 +40,14 @@ import org.apache.joshua.corpus.Vocabulary;
public class ArityPhrasePenalty extends StatelessFF {
// when the rule.arity is in the range, then this feature is activated
- private final int owner;
+ private final OwnerId owner;
private final int minArity;
private final int maxArity;
public ArityPhrasePenalty(final FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, "ArityPenalty", args, config);
- this.owner = Vocabulary.id(parsedArgs.get("owner"));
+ this.owner = OwnerMap.register(parsedArgs.get("owner"));
this.minArity = Integer.parseInt(parsedArgs.get("min-arity"));
this.maxArity = Integer.parseInt(parsedArgs.get("max-arity"));
}
@@ -55,7 +56,7 @@ public class ArityPhrasePenalty extends StatelessFF {
* Returns 1 if the arity penalty feature applies to the current rule.
*/
private int isEligible(final Rule rule) {
- if (this.owner == rule.getOwner() && rule.getArity() >= this.minArity
+ if (this.owner.equals(rule.getOwner()) && rule.getArity() >= this.minArity
&& rule.getArity() <= this.maxArity)
return 1;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java b/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
index 58de5f4..75158d0 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
@@ -27,6 +27,8 @@ import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.OwnerId;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
@@ -49,7 +51,7 @@ public class LexicalFeatures extends StatelessFF {
//whether this feature is restricted to a certain grammar/owner
private final boolean ownerRestriction;
// the grammar/owner this feature is restricted to fire
- private final int owner;
+ private final OwnerId owner;
// Strings separating words
private static final String SEPARATOR = "~";
@@ -59,7 +61,7 @@ public class LexicalFeatures extends StatelessFF {
super(weights, NAME, args, config);
ownerRestriction = (parsedArgs.containsKey("owner")) ? true : false;
- owner = ownerRestriction ? Vocabulary.id(parsedArgs.get("owner")) : 0;
+ owner = ownerRestriction ? OwnerMap.register(parsedArgs.get("owner")) : OwnerMap.UNKNOWN_OWNER_ID;
useAlignments = parsedArgs.containsKey("alignments");
useDeletions = parsedArgs.containsKey("deletions");
@@ -77,7 +79,7 @@ public class LexicalFeatures extends StatelessFF {
public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
Sentence sentence, Accumulator acc) {
- if (ownerRestriction && rule.getOwner() != owner) {
+ if (ownerRestriction && rule.getOwner().equals(owner)) {
return null;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
index 5278172..92ee740 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
@@ -25,6 +25,8 @@ import java.util.List;
import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.JoshuaConfiguration.OOVItem;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.OwnerId;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
@@ -42,7 +44,7 @@ import org.apache.joshua.decoder.chart_parser.SourcePath;
* @author Matt Post post@cs.jhu.edu
*/
public class OOVPenalty extends StatelessFF {
- private final int ownerID;
+ private final OwnerId ownerID;
/* The default value returned for OOVs. Can be overridden with -oov-list */
private final float defaultValue = -100f;
@@ -51,7 +53,7 @@ public class OOVPenalty extends StatelessFF {
public OOVPenalty(FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, "OOVPenalty", args, config);
- ownerID = Vocabulary.id("oov");
+ ownerID = OwnerMap.register("oov");
oovWeights = new HashMap<Integer,Float>();
if (config.oovList != null) {
@@ -79,7 +81,7 @@ public class OOVPenalty extends StatelessFF {
public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
Sentence sentence, Accumulator acc) {
- if (rule != null && this.ownerID == rule.getOwner()) {
+ if (rule != null && this.ownerID.equals(rule.getOwner())) {
acc.add(denseFeatureIndex, getValue(rule.getLHS()));
}
@@ -95,7 +97,7 @@ public class OOVPenalty extends StatelessFF {
*/
@Override
public float estimateCost(Rule rule, Sentence sentence) {
- if (rule != null && this.ownerID == rule.getOwner())
+ if (rule != null && this.ownerID.equals(rule.getOwner()))
return weights.getDense(denseFeatureIndex) * getValue(rule.getLHS());
return 0.0f;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
index 2324292..7ae3dbc 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
@@ -21,11 +21,12 @@ package org.apache.joshua.decoder.ff;
import java.util.ArrayList;
import java.util.List;
-import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Grammar;
+import org.apache.joshua.decoder.ff.tm.OwnerId;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
@@ -44,28 +45,26 @@ import org.apache.joshua.decoder.segment_file.Sentence;
public class PhraseModel extends StatelessFF {
/* The owner of the grammar. */
- private int ownerID;
- private String owner;
+ private final OwnerId ownerID;
+ private final String owner;
private float[] phrase_weights = null;
public PhraseModel(FeatureVector weights, String[] args, JoshuaConfiguration config, Grammar g) {
super(weights, "tm_", args, config);
- String owner = parsedArgs.get("owner");
- this.name = String.format("tm_%s", owner);
+ // Store the owner and name
+ this.owner = parsedArgs.get("owner");
+ this.ownerID = OwnerMap.register(owner);
+ this.name = String.format("tm_%s", this.owner);
/*
* Determine the number of features by querying the example grammar that was passed in.
*/
phrase_weights = new float[g.getNumDenseFeatures()];
-// System.err.println(String.format("GOT %d FEATURES FOR %s", g.getNumDenseFeatures(), owner));
for (int i = 0; i < phrase_weights.length; i++)
phrase_weights[i] = weights.getSparse(String.format("tm_%s_%d", owner, i));
-
- // Store the owner.
- this.owner = owner;
- this.ownerID = Vocabulary.id(owner);
+
}
/**
@@ -88,7 +87,7 @@ public class PhraseModel extends StatelessFF {
@Override
public float estimateCost(final Rule rule, Sentence sentence) {
- if (rule != null && rule.getOwner() == ownerID) {
+ if (rule != null && rule.getOwner().equals(ownerID)) {
if (rule.getPrecomputableCost() <= Float.NEGATIVE_INFINITY)
rule.setPrecomputableCost(phrase_weights, weights);
@@ -105,7 +104,7 @@ public class PhraseModel extends StatelessFF {
public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
Sentence sentence, Accumulator acc) {
- if (rule != null && rule.getOwner() == ownerID) {
+ if (rule != null && rule.getOwner().equals(ownerID)) {
/*
* Here, we peak at the Accumulator object. If it's asking for scores, then we don't bother to
* add each feature, but rather compute the inner product and add *that*. This is totally
@@ -130,6 +129,6 @@ public class PhraseModel extends StatelessFF {
}
public String toString() {
- return name + " " + Vocabulary.word(ownerID);
+ return name;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java b/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
index 3c38e60..9eecd0c 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
@@ -19,12 +19,13 @@
package org.apache.joshua.decoder.ff;
import java.util.ArrayList;
-import java.util.List;
+import java.util.List;
-import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.OwnerId;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.phrase.Hypothesis;
@@ -42,15 +43,15 @@ import org.apache.joshua.decoder.segment_file.Sentence;
*/
public class PhrasePenalty extends StatelessFF {
- private int owner = 0;
+ private final OwnerId owner;
private float value = 1.0f;
public PhrasePenalty(FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, "PhrasePenalty", args, config);
if (parsedArgs.containsKey("owner"))
- this.owner = Vocabulary.id(parsedArgs.get("owner"));
+ this.owner = OwnerMap.register(parsedArgs.get("owner"));
else // default
- this.owner = Vocabulary.id("pt");
+ this.owner = OwnerMap.register("pt");
}
@Override
@@ -58,7 +59,7 @@ public class PhrasePenalty extends StatelessFF {
Sentence sentence, Accumulator acc) {
if (rule != null && rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE
- && (owner == 0 || rule.getOwner() == owner))
+ && (rule.getOwner().equals(owner)))
acc.add(denseFeatureIndex, value);
return null;
@@ -79,7 +80,7 @@ public class PhrasePenalty extends StatelessFF {
@Override
public float estimateCost(Rule rule, Sentence sentence) {
if (rule != null && rule != Hypothesis.BEGIN_RULE && rule != Hypothesis.END_RULE
- && (owner == 0 || rule.getOwner() == owner))
+ && (rule.getOwner().equals(owner)))
return weights.getDense(denseFeatureIndex) * value;
return 0.0f;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java b/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java
index 5ba0c66..3ffbf65 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java
@@ -20,10 +20,11 @@ package org.apache.joshua.decoder.ff;
import java.util.List;
-import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.OwnerId;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
@@ -38,9 +39,11 @@ public class RuleCountBin extends StatelessFF {
private static final Logger LOG = LoggerFactory.getLogger(RuleCountBin.class);
private int field = -1;
+ private final OwnerId owner;
public RuleCountBin(FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, "RuleCountBin", args, config);
+ owner = OwnerMap.register("pt");
field = Integer.parseInt(parsedArgs.get("field"));
}
@@ -49,7 +52,7 @@ public class RuleCountBin extends StatelessFF {
public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
Sentence sentence, Accumulator acc) {
- if (rule.getOwner() != Vocabulary.id("pt"))
+ if (rule.getOwner().equals(owner))
return null;
float rarityPenalty = -rule.getFeatureVector().getSparse(String.format("tm_pt_%d", field));
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java b/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
index 909e481..308d38a 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
@@ -19,6 +19,7 @@
package org.apache.joshua.decoder.ff;
import static com.google.common.cache.CacheBuilder.newBuilder;
+import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER_ID;
import java.util.List;
@@ -26,6 +27,8 @@ import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.OwnerId;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
@@ -47,7 +50,7 @@ public class RuleFF extends StatelessFF {
// whether this feature is restricted to a certain grammar/owner
private final boolean ownerRestriction;
// the grammar/owner this feature is restricted to fire
- private final int owner;
+ private final OwnerId owner;
// what part of the rule should be extracted;
private final Sides sides;
// Strings separating words and rule sides
@@ -60,7 +63,7 @@ public class RuleFF extends StatelessFF {
super(weights, NAME, args, config);
ownerRestriction = (parsedArgs.containsKey("owner")) ? true : false;
- owner = ownerRestriction ? Vocabulary.id(parsedArgs.get("owner")) : 0;
+ owner = ownerRestriction ? OwnerMap.register(parsedArgs.get("owner")) : UNKNOWN_OWNER_ID;
if (parsedArgs.containsKey("sides")) {
final String sideValue = parsedArgs.get("sides");
@@ -89,7 +92,7 @@ public class RuleFF extends StatelessFF {
public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
Sentence sentence, Accumulator acc) {
- if (ownerRestriction && rule.getOwner() != owner) {
+ if (ownerRestriction && !rule.getOwner().equals(owner)) {
return null;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
index fa4c4af..861cf35 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
@@ -31,6 +31,8 @@ import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.FeatureVector;
import org.apache.joshua.decoder.ff.StatefulFF;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
+import org.apache.joshua.decoder.ff.tm.OwnerId;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
import org.apache.joshua.decoder.hypergraph.HGNode;
@@ -304,10 +306,11 @@ public class FragmentLMFF extends StatefulFF {
.parseLine("[SBAR] ||| that he was done ||| that he was done ||| 0");
Rule rulePERIOD = new HieroFormatReader().parseLine("[.] ||| . ||| . ||| 0");
- ruleS.setOwner(0);
- ruleVP.setOwner(0);
- ruleSBAR.setOwner(0);
- rulePERIOD.setOwner(0);
+ final OwnerId owner = OwnerMap.register("0");
+ ruleS.setOwner(owner);
+ ruleVP.setOwner(owner);
+ ruleSBAR.setOwner(owner);
+ rulePERIOD.setOwner(owner);
HyperEdge edgeSBAR = new HyperEdge(ruleSBAR, 0.0f, 0.0f, null, (SourcePath) null);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
index 5a5d02b..8f7a773 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
@@ -29,11 +29,11 @@ import org.apache.joshua.decoder.segment_file.Token;
import org.apache.joshua.lattice.Arc;
import org.apache.joshua.lattice.Lattice;
import org.apache.joshua.lattice.Node;
-
-import cern.colt.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import cern.colt.Arrays;
+
/**
* Partial implementation of the <code>Grammar</code> interface that provides logic for sorting a
* grammar.
@@ -60,7 +60,7 @@ public abstract class AbstractGrammar implements Grammar {
* The grammar's owner, used to determine which weights are applicable to the dense features found
* within.
*/
- protected int owner = -1;
+ protected final OwnerId owner;
/*
* The maximum length of a source-side phrase. Mostly used by the phrase-based decoder.
@@ -78,29 +78,29 @@ public abstract class AbstractGrammar implements Grammar {
}
@Override
- public int getOwner() {
+ public OwnerId getOwner() {
return owner;
}
+
+ public int getSpanLimit() {
+ return spanLimit;
+ }
- /* The maximum span of the input this rule can be applied to. */
- protected int spanLimit = 1;
+ /* The maximum span of the input this grammar rules can be applied to. */
+ protected final int spanLimit;
- protected JoshuaConfiguration joshuaConfiguration;
+ protected final JoshuaConfiguration joshuaConfiguration;
/**
- * Constructs an empty, unsorted grammar.
+ * Creates an empty, unsorted grammar with given owner and spanlimit
*
* @see Grammar#isSorted()
* @param config a {@link org.apache.joshua.decoder.JoshuaConfiguration} object
*/
- public AbstractGrammar(JoshuaConfiguration config) {
- this.joshuaConfiguration = config;
+ public AbstractGrammar(final String owner, final JoshuaConfiguration config, final int spanLimit) {
this.sorted = false;
- }
-
- public AbstractGrammar(int owner, int spanLimit) {
- this.sorted = false;
- this.owner = owner;
+ this.owner = OwnerMap.register(owner);
+ this.joshuaConfiguration = config;
this.spanLimit = spanLimit;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
index 06252a1..8f90d1b 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
@@ -95,7 +95,7 @@ public interface Grammar {
* Return the grammar's owner.
* @return grammar owner
*/
- int getOwner();
+ OwnerId getOwner();
/**
* Return the maximum source phrase length (terminals + nonterminals)
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/tm/OwnerId.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/OwnerId.java b/src/main/java/org/apache/joshua/decoder/ff/tm/OwnerId.java
new file mode 100644
index 0000000..2b9e15b
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/OwnerId.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.tm;
+
+import java.util.Objects;
+
+/**
+ * OwnerId wraps Integer but is strongly typed to avoid confusion between ints
+ * that represent an ID and ints that represent really just an int.
+ *
+ */
+public class OwnerId {
+
+ private final Integer value;
+
+ OwnerId(final int value) {
+ this.value = value;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hashCode(value);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ OwnerId other = (OwnerId) obj;
+ return Objects.equals(this.value, other.value);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/tm/OwnerMap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/OwnerMap.java b/src/main/java/org/apache/joshua/decoder/ff/tm/OwnerMap.java
new file mode 100644
index 0000000..5838620
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/OwnerMap.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.tm;
+
+import java.util.concurrent.locks.StampedLock;
+
+import com.google.common.collect.BiMap;
+import com.google.common.collect.HashBiMap;
+
+/**
+ * OwnerMap maintains a decoder-wide mapping between 'owner' strings and
+ * corresponding IDs, typed as OwnerId. Using this more strongly typed mapping,
+ * we can separate wordIDs in {@link Vocabulary} from {@link OwnerId}s. For
+ * example, this prevents packed grammars to overwrite the owner mappings from
+ * previously loaded packaged grammars.
+ *
+ * @author fhieber
+ *
+ */
+public class OwnerMap {
+
+ // bi-directional mapping between OwnerId and Owner strings
+ private static BiMap<OwnerId, String> map = HashBiMap.create();
+
+ public static final OwnerId UNKNOWN_OWNER_ID = new OwnerId(0);
+ public static final String UNKNOWN_OWNER = "<unowned>";
+
+ private static final StampedLock lock = new StampedLock();
+
+ static {
+ clear();
+ }
+
+ /**
+ * Register or get OwnerId for given ownerString. This is only called during
+ * feature function and grammar initalization and thus does not require
+ * sophisticated locking.
+ */
+ public static synchronized OwnerId register(String ownerString) {
+ if (map.inverse().containsKey(ownerString)) {
+ return map.inverse().get(ownerString);
+ }
+
+ final OwnerId newId = new OwnerId(map.size());
+ map.put(newId, ownerString);
+ return newId;
+ }
+
+ public static String getOwner(final OwnerId id) {
+ long lock_stamp = lock.readLock();
+ try {
+ if (map.containsKey(id)) {
+ return map.get(id);
+ }
+ throw new IllegalArgumentException(
+ String.format("OwnerMap does not contain mapping for %s", id));
+ } finally {
+ lock.unlockRead(lock_stamp);
+ }
+ }
+
+ public static synchronized void clear() {
+ map.clear();
+ map.put(UNKNOWN_OWNER_ID, UNKNOWN_OWNER);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
index 0e5a4bc..15fbec1 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
@@ -18,6 +18,8 @@
*/
package org.apache.joshua.decoder.ff.tm;
+import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER_ID;
+
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
@@ -66,7 +68,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
* a feature function will be fired for this rule only if the owner of the rule matches the owner
* of the feature function
*/
- private int owner = -1;
+ private OwnerId owner = UNKNOWN_OWNER_ID;
/**
* This is the cost computed only from the features present with the grammar rule. This cost is
@@ -99,7 +101,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
* @param arity Number of nonterminals in the source language right-hand side.
* @param owner todo
*/
- public Rule(int lhs, int[] source, int[] target, String sparseFeatures, int arity, int owner) {
+ public Rule(int lhs, int[] source, int[] target, String sparseFeatures, int arity, OwnerId owner) {
this.lhs = lhs;
this.source = source;
this.arity = arity;
@@ -119,7 +121,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
* @param arity todo
* @param owner todo
*/
- public Rule(int lhs, int[] sourceRhs, int[] targetRhs, FeatureVector features, int arity, int owner) {
+ public Rule(int lhs, int[] sourceRhs, int[] targetRhs, FeatureVector features, int arity, OwnerId owner) {
this.lhs = lhs;
this.source = sourceRhs;
this.arity = arity;
@@ -132,7 +134,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
/**
* Constructor used for SamtFormatReader and GrammarBuilderWalkerFunction's getRuleWithSpans()
- * Owner set to -1
+ * Rule is unowned.
* @param lhs todo
* @param sourceRhs todo
* @param targetRhs todo
@@ -140,7 +142,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
* @param arity todo
*/
public Rule(int lhs, int[] sourceRhs, int[] targetRhs, String sparseFeatures, int arity) {
- this(lhs, sourceRhs, targetRhs, sparseFeatures, arity, -1);
+ this(lhs, sourceRhs, targetRhs, sparseFeatures, arity, OwnerMap.UNKNOWN_OWNER_ID);
}
/**
@@ -191,8 +193,8 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
*/
private Supplier<FeatureVector> initializeFeatureSupplierFromString(){
return Suppliers.memoize(() ->{
- if (owner != -1) {
- return new FeatureVector(getFeatureString(), "tm_" + Vocabulary.word(owner) + "_");
+ if (!owner.equals(UNKNOWN_OWNER_ID)) {
+ return new FeatureVector(getFeatureString(), "tm_" + OwnerMap.getOwner(owner) + "_");
} else {
return new FeatureVector();
}
@@ -264,11 +266,11 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
return this.arity;
}
- public void setOwner(int owner) {
+ public void setOwner(final OwnerId owner) {
this.owner = owner;
}
- public int getOwner() {
+ public OwnerId getOwner() {
return this.owner;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
index c952b05..54f68b2 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
@@ -53,7 +53,7 @@ public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
* @param sentence {@link org.apache.joshua.lattice.Lattice} input
*/
SentenceFilteredGrammar(AbstractGrammar baseGrammar, Sentence sentence) {
- super(baseGrammar.joshuaConfiguration);
+ super(OwnerMap.getOwner(baseGrammar.getOwner()), baseGrammar.joshuaConfiguration, baseGrammar.getSpanLimit());
this.baseGrammar = baseGrammar;
this.sentence = sentence;
this.tokens = sentence.getWordIDs();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
index f346e7a..365102b 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
@@ -28,6 +28,7 @@ import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.JoshuaConfiguration.OOVItem;
import org.apache.joshua.decoder.ff.FeatureFunction;
import org.apache.joshua.decoder.ff.tm.AbstractGrammar;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.ff.tm.GrammarReader;
import org.apache.joshua.decoder.ff.tm.Trie;
@@ -64,7 +65,7 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
private int numDenseFeatures = 0;
/* The trie root. */
- private MemoryBasedTrie root = null;
+ private MemoryBasedTrie root = new MemoryBasedTrie();
/* The file containing the grammar. */
private String grammarFile;
@@ -79,33 +80,28 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
// Constructors
// ===============================================================
- public MemoryBasedBatchGrammar(JoshuaConfiguration joshuaConfiguration) {
- super(joshuaConfiguration);
- this.root = new MemoryBasedTrie();
- this.joshuaConfiguration = joshuaConfiguration;
- setSpanLimit(20);
- }
-
- public MemoryBasedBatchGrammar(String owner, JoshuaConfiguration joshuaConfiguration) {
- this(joshuaConfiguration);
- this.owner = Vocabulary.id(owner);
+ /**
+ * Constructor used by Decoder mostly. Default spanLimit of 20
+ */
+ public MemoryBasedBatchGrammar(String owner, JoshuaConfiguration config, int spanLimit) {
+ super(owner, config, spanLimit);
}
- public MemoryBasedBatchGrammar(GrammarReader<Rule> gr, JoshuaConfiguration joshuaConfiguration) {
- // this.defaultOwner = Vocabulary.id(defaultOwner);
- // this.defaultLHS = Vocabulary.id(defaultLHSSymbol);
- this(joshuaConfiguration);
- modelReader = gr;
+ /**
+ * Constructor to initialize a GrammarReader (unowned)
+ */
+ public MemoryBasedBatchGrammar(
+ final GrammarReader<Rule> reader, final JoshuaConfiguration config, final int spanLimit) {
+ super(OwnerMap.UNKNOWN_OWNER, config, spanLimit);
+ modelReader = reader;
}
public MemoryBasedBatchGrammar(String formatKeyword, String grammarFile, String owner,
String defaultLHSSymbol, int spanLimit, JoshuaConfiguration joshuaConfiguration)
throws IOException {
- this(joshuaConfiguration);
- this.owner = Vocabulary.id(owner);
+ super(owner, joshuaConfiguration, spanLimit);
Vocabulary.id(defaultLHSSymbol);
- this.spanLimit = spanLimit;
this.grammarFile = grammarFile;
// ==== loading grammar
@@ -141,10 +137,6 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
// Methods
// ===============================================================
- public void setSpanLimit(int spanLimit) {
- this.spanLimit = spanLimit;
- }
-
@Override
public int getNumRules() {
return this.qtyRulesRead;
@@ -172,13 +164,8 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
*/
public void addRule(Rule rule) {
- // TODO: Why two increments?
this.qtyRulesRead++;
- // if (owner == -1) {
- // System.err.println("* FATAL: MemoryBasedBatchGrammar::addRule(): owner not set for grammar");
- // System.exit(1);
- // }
rule.setOwner(owner);
if (numDenseFeatures == 0)
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
index b48685d..37bffb7 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -19,7 +19,7 @@
package org.apache.joshua.decoder.ff.tm.packed;
/***
- * This package implements Joshua's packed grammar structure, which enables the efficient loading
+ * This package implements Joshua's packed grammar structure, which enables the efficient loading
* and accessing of grammars. It is described in the paper:
*
* @article{ganitkevitch2012joshua,
@@ -85,6 +85,7 @@ import org.apache.joshua.decoder.ff.FeatureFunction;
import org.apache.joshua.decoder.ff.FeatureVector;
import org.apache.joshua.decoder.ff.tm.AbstractGrammar;
import org.apache.joshua.decoder.ff.tm.BasicRuleCollection;
+import org.apache.joshua.decoder.ff.tm.OwnerId;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.ff.tm.RuleCollection;
import org.apache.joshua.decoder.ff.tm.Trie;
@@ -98,6 +99,7 @@ import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
+
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -123,10 +125,9 @@ public class PackedGrammar extends AbstractGrammar {
public PackedGrammar(String grammar_dir, int span_limit, String owner, String type,
JoshuaConfiguration joshuaConfiguration) throws IOException {
- super(joshuaConfiguration);
+ super(owner, joshuaConfiguration, span_limit);
this.grammarDir = grammar_dir;
- this.spanLimit = span_limit;
// Read the vocabulary.
vocabFile = new File(grammar_dir + File.separator + VOCABULARY_FILENAME);
@@ -147,9 +148,6 @@ public class PackedGrammar extends AbstractGrammar {
encoding = new EncoderConfiguration();
encoding.load(grammar_dir + File.separator + "encoding");
- // Set phrase owner.
- this.owner = Vocabulary.id(owner);
-
final List<String> listing = Arrays.asList(new File(grammar_dir).list());
sort(listing); // File.list() has arbitrary sort order
slices = new ArrayList<PackedSlice>();
@@ -940,11 +938,11 @@ public class PackedGrammar extends AbstractGrammar {
}
@Override
- public void setOwner(int ow) {
+ public void setOwner(OwnerId owner) {
}
@Override
- public int getOwner() {
+ public OwnerId getOwner() {
return owner;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java b/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
index a6edddd..c5d2398 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
@@ -55,8 +55,7 @@ public class GrammarBuilderWalkerFunction implements WalkerFunction {
private HashSet<Rule> rules;
public GrammarBuilderWalkerFunction(String goal,JoshuaConfiguration joshuaConfiguration) {
- grammar = new MemoryBasedBatchGrammar(reader,joshuaConfiguration);
- grammar.setSpanLimit(1000);
+ grammar = new MemoryBasedBatchGrammar(reader, joshuaConfiguration, 1000);
outStream = null;
goalSymbol = Vocabulary.id(goal);
rules = new HashSet<Rule>();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
index 27f5525..51bd9d6 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
@@ -164,7 +164,7 @@ public class HyperGraphPruning extends TrivialInsideOutside {
double postLogProb = getEdgeUnormalizedPosteriorLogProb(dt, parent);
- if (dt.getRule() != null && dt.getRule().getOwner() == glueGrammarOwner
+ if (dt.getRule() != null && dt.getRule().getOwner().equals(glueGrammarOwner)
&& dt.getRule().getArity() == 2) { // specicial rule: S->S X
// TODO
return (postLogProb - this.bestLogProb < THRESHOLD_GLUE);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
index 27f92ac..0e03dba 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
@@ -18,6 +18,8 @@
*/
package org.apache.joshua.decoder.phrase;
+import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER;
+
import java.io.File;
import java.io.IOException;
import java.util.List;
@@ -26,6 +28,7 @@ import org.apache.joshua.corpus.Vocabulary;
import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.ff.FeatureFunction;
import org.apache.joshua.decoder.ff.tm.Grammar;
+import org.apache.joshua.decoder.ff.tm.OwnerId;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.ff.tm.RuleCollection;
import org.apache.joshua.decoder.ff.tm.Trie;
@@ -73,8 +76,7 @@ public class PhraseTable implements Grammar {
public PhraseTable(String owner, JoshuaConfiguration config) {
this.config = config;
-
- this.backend = new MemoryBasedBatchGrammar(owner, config);
+ this.backend = new MemoryBasedBatchGrammar(owner, config, 20);
}
/**
@@ -129,7 +131,7 @@ public class PhraseTable implements Grammar {
int nt_i = Vocabulary.id("[X]");
Rule oovRule = new Rule(nt_i, new int[] { nt_i, sourceWord },
- new int[] { -1, targetWord }, "", 1, null);
+ new int[] { -1, targetWord }, "", 1, UNKNOWN_OWNER);
addRule(oovRule);
oovRule.estimateRuleCost(featureFunctions);
@@ -170,7 +172,7 @@ public class PhraseTable implements Grammar {
}
@Override
- public int getOwner() {
+ public OwnerId getOwner() {
return backend.getOwner();
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
index c688b2c..8c092ec 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
@@ -35,6 +35,8 @@ package org.apache.joshua.decoder.phrase;
* TODO Lattice decoding is not yet supported (March 2015).
*/
+import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER;
+
import java.util.ArrayList;
import java.util.List;
@@ -96,7 +98,7 @@ public class Stacks {
if (grammars[i] instanceof PhraseTable)
phraseTables[j++] = (PhraseTable) grammars[i];
- phraseTables[phraseTables.length - 2] = new PhraseTable("null", config);
+ phraseTables[phraseTables.length - 2] = new PhraseTable(UNKNOWN_OWNER, config);
phraseTables[phraseTables.length - 2].addRule(Hypothesis.END_RULE);
phraseTables[phraseTables.length - 1] = new PhraseTable("oov", config);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java b/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
index 7207d80..b0952be 100644
--- a/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
+++ b/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
@@ -26,6 +26,7 @@ import org.apache.joshua.decoder.Decoder;
import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.ff.FeatureVector;
import org.apache.joshua.decoder.ff.lm.LanguageModelFF;
+import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
@@ -65,7 +66,7 @@ public class ClassBasedLanguageModelTest {
@Test
public void givenRuleWithSingleWord_whenGetRuleId_thenIsMappedToClass() {
final int[] target = Vocabulary.addAll(new String[] { "professionalism" });
- final Rule rule = new Rule(0, null, target, "", 0, 0);
+ final Rule rule = new Rule(0, null, target, new FeatureVector(), 0, OwnerMap.register(OwnerMap.UNKNOWN_OWNER));
assertEquals(Vocabulary.word(ff.getRuleIds(rule)[0]), "13");
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/test/java/org/apache/joshua/decoder/ff/tm/OwnerMapTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/ff/tm/OwnerMapTest.java b/src/test/java/org/apache/joshua/decoder/ff/tm/OwnerMapTest.java
new file mode 100644
index 0000000..8d129e1
--- /dev/null
+++ b/src/test/java/org/apache/joshua/decoder/ff/tm/OwnerMapTest.java
@@ -0,0 +1,39 @@
+package org.apache.joshua.decoder.ff.tm;
+
+import static org.testng.Assert.assertEquals;
+
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+public class OwnerMapTest {
+
+ @BeforeMethod
+ public void setUp() throws Exception {
+ OwnerMap.clear();
+ }
+
+ @AfterMethod
+ public void tearDown() throws Exception {
+ OwnerMap.clear();
+ }
+
+ @Test(expectedExceptions = IllegalArgumentException.class)
+ public void given_invalidId_thenThrowsException() {
+ OwnerMap.getOwner(new OwnerId(3));
+ }
+
+ @Test
+ public void givenOwner_whenRegisteringOwner_thenMappingIsCorrect() {
+ // GIVEN
+ String owner = "owner";
+
+ // WHEN
+ OwnerId id = OwnerMap.register(owner);
+ OwnerId id2 = OwnerMap.register(owner);
+
+ // THEN
+ assertEquals(id, id2);
+ assertEquals(owner, OwnerMap.getOwner(id));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1011bbb0/src/test/java/org/apache/joshua/system/AlignmentMapTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/AlignmentMapTest.java b/src/test/java/org/apache/joshua/system/AlignmentMapTest.java
index eba732a..a28edc7 100644
--- a/src/test/java/org/apache/joshua/system/AlignmentMapTest.java
+++ b/src/test/java/org/apache/joshua/system/AlignmentMapTest.java
@@ -26,6 +26,7 @@ import java.util.List;
import java.util.Map;
import org.apache.joshua.corpus.Vocabulary;
+import org.apache.joshua.decoder.ff.FeatureVector;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.junit.Before;
@@ -51,7 +52,7 @@ public class AlignmentMapTest {
expectedAlignmentMap.put(1, Arrays.asList(3));
expectedAlignmentMap.put(3, Arrays.asList(3));
rule1 = new Rule(-1, sourceRhs, targetRhs, "", arity, alignment);
- rule2 = new Rule(-1, sourceRhs, targetRhs, "", arity, null); // rule with no alignment
+ rule2 = new Rule(-1, sourceRhs, targetRhs, new FeatureVector(), arity, null); // rule with no alignment
}
@Test
[3/3] incubator-joshua git commit: Merge branch 'fhieber-owner'
Posted by mj...@apache.org.
Merge branch 'fhieber-owner'
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/f3a51183
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/f3a51183
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/f3a51183
Branch: refs/heads/master
Commit: f3a511836d139f270ce2ffaeefd39090b3cbb826
Parents: 2b5b4dc fd8867d
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Jun 22 17:11:52 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Jun 22 17:11:52 2016 -0400
----------------------------------------------------------------------
.../java/org/apache/joshua/decoder/Decoder.java | 26 +++---
.../joshua/decoder/chart_parser/Chart.java | 2 +-
.../joshua/decoder/ff/ArityPhrasePenalty.java | 11 +--
.../joshua/decoder/ff/LexicalFeatures.java | 8 +-
.../apache/joshua/decoder/ff/OOVPenalty.java | 10 ++-
.../apache/joshua/decoder/ff/PhraseModel.java | 25 +++---
.../apache/joshua/decoder/ff/PhrasePenalty.java | 15 ++--
.../apache/joshua/decoder/ff/RuleCountBin.java | 7 +-
.../org/apache/joshua/decoder/ff/RuleFF.java | 9 ++-
.../decoder/ff/fragmentlm/FragmentLMFF.java | 11 ++-
.../joshua/decoder/ff/tm/AbstractGrammar.java | 30 +++----
.../apache/joshua/decoder/ff/tm/Grammar.java | 2 +-
.../apache/joshua/decoder/ff/tm/OwnerId.java | 52 ++++++++++++
.../apache/joshua/decoder/ff/tm/OwnerMap.java | 83 ++++++++++++++++++++
.../org/apache/joshua/decoder/ff/tm/Rule.java | 20 ++---
.../decoder/ff/tm/SentenceFilteredGrammar.java | 2 +-
.../tm/hash_based/MemoryBasedBatchGrammar.java | 43 ++++------
.../decoder/ff/tm/packed/PackedGrammar.java | 14 ++--
.../GrammarBuilderWalkerFunction.java | 3 +-
.../decoder/hypergraph/HyperGraphPruning.java | 2 +-
.../joshua/decoder/phrase/PhraseTable.java | 10 ++-
.../apache/joshua/decoder/phrase/Stacks.java | 4 +-
.../class_lm/ClassBasedLanguageModelTest.java | 3 +-
.../joshua/decoder/ff/tm/OwnerMapTest.java | 39 +++++++++
.../apache/joshua/system/AlignmentMapTest.java | 3 +-
25 files changed, 307 insertions(+), 127 deletions(-)
----------------------------------------------------------------------
[2/3] incubator-joshua git commit: Merge branch 'owner' of
github.com:fhieber/incubator-joshua into fhieber-owner
Posted by mj...@apache.org.
Merge branch 'owner' of github.com:fhieber/incubator-joshua into fhieber-owner
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/fd8867d4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/fd8867d4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/fd8867d4
Branch: refs/heads/master
Commit: fd8867d4d94324d0570899e736b4b8f631e95a1a
Parents: 2b5b4dc 1011bbb
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Jun 22 17:06:17 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Jun 22 17:06:17 2016 -0400
----------------------------------------------------------------------
.../java/org/apache/joshua/decoder/Decoder.java | 26 +++---
.../joshua/decoder/chart_parser/Chart.java | 2 +-
.../joshua/decoder/ff/ArityPhrasePenalty.java | 11 +--
.../joshua/decoder/ff/LexicalFeatures.java | 8 +-
.../apache/joshua/decoder/ff/OOVPenalty.java | 10 ++-
.../apache/joshua/decoder/ff/PhraseModel.java | 25 +++---
.../apache/joshua/decoder/ff/PhrasePenalty.java | 15 ++--
.../apache/joshua/decoder/ff/RuleCountBin.java | 7 +-
.../org/apache/joshua/decoder/ff/RuleFF.java | 9 ++-
.../decoder/ff/fragmentlm/FragmentLMFF.java | 11 ++-
.../joshua/decoder/ff/tm/AbstractGrammar.java | 30 +++----
.../apache/joshua/decoder/ff/tm/Grammar.java | 2 +-
.../apache/joshua/decoder/ff/tm/OwnerId.java | 52 ++++++++++++
.../apache/joshua/decoder/ff/tm/OwnerMap.java | 83 ++++++++++++++++++++
.../org/apache/joshua/decoder/ff/tm/Rule.java | 20 ++---
.../decoder/ff/tm/SentenceFilteredGrammar.java | 2 +-
.../tm/hash_based/MemoryBasedBatchGrammar.java | 43 ++++------
.../decoder/ff/tm/packed/PackedGrammar.java | 14 ++--
.../GrammarBuilderWalkerFunction.java | 3 +-
.../decoder/hypergraph/HyperGraphPruning.java | 2 +-
.../joshua/decoder/phrase/PhraseTable.java | 10 ++-
.../apache/joshua/decoder/phrase/Stacks.java | 4 +-
.../class_lm/ClassBasedLanguageModelTest.java | 3 +-
.../joshua/decoder/ff/tm/OwnerMapTest.java | 39 +++++++++
.../apache/joshua/system/AlignmentMapTest.java | 3 +-
25 files changed, 307 insertions(+), 127 deletions(-)
----------------------------------------------------------------------