You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/01 02:52:21 UTC
[86/94] [abbrv] incubator-joshua git commit: moved misplaced file
moved misplaced file
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/e57320f2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/e57320f2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/e57320f2
Branch: refs/heads/master
Commit: e57320f25f9d91203d659ec3b25b4928c6ceaad2
Parents: 8793c45
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue May 31 22:07:05 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue May 31 22:07:05 2016 -0400
----------------------------------------------------------------------
src/joshua/decoder/ff/LexicalFeatures.java | 131 -------------------
.../joshua/decoder/ff/LexicalFeatures.java | 131 +++++++++++++++++++
2 files changed, 131 insertions(+), 131 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e57320f2/src/joshua/decoder/ff/LexicalFeatures.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/LexicalFeatures.java b/src/joshua/decoder/ff/LexicalFeatures.java
deleted file mode 100644
index 128df87..0000000
--- a/src/joshua/decoder/ff/LexicalFeatures.java
+++ /dev/null
@@ -1,131 +0,0 @@
-package joshua.decoder.ff;
-
-import static com.google.common.cache.CacheBuilder.newBuilder;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.JoshuaConfiguration;
-import joshua.decoder.chart_parser.SourcePath;
-import joshua.decoder.ff.state_maintenance.DPState;
-import joshua.decoder.ff.tm.Rule;
-import joshua.decoder.hypergraph.HGNode;
-import joshua.decoder.segment_file.Sentence;
-
-import com.google.common.cache.Cache;
-
-/**
- * Lexical alignment features denoting alignments, deletions, and insertions.
- */
-public class LexicalFeatures extends StatelessFF {
-
- private final boolean useAlignments;
- private final boolean useDeletions;
- private final boolean useInsertions;
-
- private static final String NAME = "LexicalFeatures";
- // value to fire for features
- private static final int VALUE = 1;
- //whether this feature is restricted to a certain grammar/owner
- private final boolean ownerRestriction;
- // the grammar/owner this feature is restricted to fire
- private final int owner;
- // Strings separating words
- private static final String SEPARATOR = "~";
-
- private final Cache<Rule, List<String>> featureCache;
-
- public LexicalFeatures(FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, NAME, args, config);
-
- ownerRestriction = (parsedArgs.containsKey("owner")) ? true : false;
- owner = ownerRestriction ? Vocabulary.id(parsedArgs.get("owner")) : 0;
-
- useAlignments = parsedArgs.containsKey("alignments");
- useDeletions = parsedArgs.containsKey("deletions");
- useInsertions = parsedArgs.containsKey("insertions");
-
- // initialize cache
- if (parsedArgs.containsKey("cacheSize")) {
- featureCache = newBuilder().maximumSize(Integer.parseInt(parsedArgs.get("cacheSize"))).build();
- } else {
- featureCache = newBuilder().maximumSize(config.cachedRuleSize).build();
- }
- }
-
- @Override
- public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
- Sentence sentence, Accumulator acc) {
-
- if (ownerRestriction && rule.getOwner() != owner) {
- return null;
- }
-
- List<String> featureNames = featureCache.getIfPresent(rule);
- if (featureNames == null) {
- featureNames = getFeatures(rule);
- featureCache.put(rule, featureNames);
- }
- for (String feature : featureNames) {
- acc.add(feature, VALUE);
- }
-
- return null;
- }
-
- /**
- * Obtains the feature ids for the given rule.
- * @param rule
- * @return String representing the feature name.s
- */
- private List<String> getFeatures(final Rule rule) {
- final List<String> result = new ArrayList<>();
-
- byte[] alignments = rule.getAlignment();
- if (alignments == null) {
- return result;
- }
- int[] sourceWords = rule.getFrench();
- int[] targetWords = rule.getEnglish();
-
- // sourceAligned & targetAligned indicate whether an index is covered by alignments
- boolean[] sourceAligned = new boolean[sourceWords.length];
- boolean[] targetAligned = new boolean[targetWords.length];
-
- // translations: aligned words
- for (int i = 0; i < alignments.length; i+=2) {
- byte sourceIndex = alignments[i];
- byte targetIndex = alignments[i + 1];
- sourceAligned[sourceIndex] = true;
- targetAligned[targetIndex] = true;
- if (useAlignments) {
- result.add(
- "T:" +
- Vocabulary.word(sourceWords[sourceIndex]) +
- SEPARATOR +
- Vocabulary.word(targetWords[targetIndex]));
- }
- }
-
- // deletions: unaligned source words
- if (useDeletions) {
- for (int i = 0; i < sourceAligned.length; i++) {
- if (!sourceAligned[i] && !Vocabulary.nt(sourceWords[i])) {
- result.add("D:" + Vocabulary.word(sourceWords[i]));
- }
- }
- }
-
- // insertions: unaligned target words
- if (useInsertions) {
- for (int i = 0; i < targetAligned.length; i++) {
- if (useInsertions && !targetAligned[i] && !Vocabulary.nt(targetWords[i])) {
- result.add("I:" + Vocabulary.word(targetWords[i]));
- }
- }
- }
-
- return result;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e57320f2/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java b/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
new file mode 100644
index 0000000..128df87
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
@@ -0,0 +1,131 @@
+package joshua.decoder.ff;
+
+import static com.google.common.cache.CacheBuilder.newBuilder;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import joshua.corpus.Vocabulary;
+import joshua.decoder.JoshuaConfiguration;
+import joshua.decoder.chart_parser.SourcePath;
+import joshua.decoder.ff.state_maintenance.DPState;
+import joshua.decoder.ff.tm.Rule;
+import joshua.decoder.hypergraph.HGNode;
+import joshua.decoder.segment_file.Sentence;
+
+import com.google.common.cache.Cache;
+
+/**
+ * Lexical alignment features denoting alignments, deletions, and insertions.
+ */
+public class LexicalFeatures extends StatelessFF {
+
+ private final boolean useAlignments;
+ private final boolean useDeletions;
+ private final boolean useInsertions;
+
+ private static final String NAME = "LexicalFeatures";
+ // value to fire for features
+ private static final int VALUE = 1;
+ //whether this feature is restricted to a certain grammar/owner
+ private final boolean ownerRestriction;
+ // the grammar/owner this feature is restricted to fire
+ private final int owner;
+ // Strings separating words
+ private static final String SEPARATOR = "~";
+
+ private final Cache<Rule, List<String>> featureCache;
+
+ public LexicalFeatures(FeatureVector weights, String[] args, JoshuaConfiguration config) {
+ super(weights, NAME, args, config);
+
+ ownerRestriction = (parsedArgs.containsKey("owner")) ? true : false;
+ owner = ownerRestriction ? Vocabulary.id(parsedArgs.get("owner")) : 0;
+
+ useAlignments = parsedArgs.containsKey("alignments");
+ useDeletions = parsedArgs.containsKey("deletions");
+ useInsertions = parsedArgs.containsKey("insertions");
+
+ // initialize cache
+ if (parsedArgs.containsKey("cacheSize")) {
+ featureCache = newBuilder().maximumSize(Integer.parseInt(parsedArgs.get("cacheSize"))).build();
+ } else {
+ featureCache = newBuilder().maximumSize(config.cachedRuleSize).build();
+ }
+ }
+
+ @Override
+ public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
+ Sentence sentence, Accumulator acc) {
+
+ if (ownerRestriction && rule.getOwner() != owner) {
+ return null;
+ }
+
+ List<String> featureNames = featureCache.getIfPresent(rule);
+ if (featureNames == null) {
+ featureNames = getFeatures(rule);
+ featureCache.put(rule, featureNames);
+ }
+ for (String feature : featureNames) {
+ acc.add(feature, VALUE);
+ }
+
+ return null;
+ }
+
+ /**
+ * Obtains the feature ids for the given rule.
+ * @param rule
+ * @return String representing the feature name.s
+ */
+ private List<String> getFeatures(final Rule rule) {
+ final List<String> result = new ArrayList<>();
+
+ byte[] alignments = rule.getAlignment();
+ if (alignments == null) {
+ return result;
+ }
+ int[] sourceWords = rule.getFrench();
+ int[] targetWords = rule.getEnglish();
+
+ // sourceAligned & targetAligned indicate whether an index is covered by alignments
+ boolean[] sourceAligned = new boolean[sourceWords.length];
+ boolean[] targetAligned = new boolean[targetWords.length];
+
+ // translations: aligned words
+ for (int i = 0; i < alignments.length; i+=2) {
+ byte sourceIndex = alignments[i];
+ byte targetIndex = alignments[i + 1];
+ sourceAligned[sourceIndex] = true;
+ targetAligned[targetIndex] = true;
+ if (useAlignments) {
+ result.add(
+ "T:" +
+ Vocabulary.word(sourceWords[sourceIndex]) +
+ SEPARATOR +
+ Vocabulary.word(targetWords[targetIndex]));
+ }
+ }
+
+ // deletions: unaligned source words
+ if (useDeletions) {
+ for (int i = 0; i < sourceAligned.length; i++) {
+ if (!sourceAligned[i] && !Vocabulary.nt(sourceWords[i])) {
+ result.add("D:" + Vocabulary.word(sourceWords[i]));
+ }
+ }
+ }
+
+ // insertions: unaligned target words
+ if (useInsertions) {
+ for (int i = 0; i < targetAligned.length; i++) {
+ if (useInsertions && !targetAligned[i] && !Vocabulary.nt(targetWords[i])) {
+ result.add("I:" + Vocabulary.word(targetWords[i]));
+ }
+ }
+ }
+
+ return result;
+ }
+}