You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/16 06:27:02 UTC
[46/66] [partial] incubator-joshua git commit: JOSHUA-252 Make it
possible to use Maven to build Joshua
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/chart_parser/Chart.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/chart_parser/Chart.java b/src/joshua/decoder/chart_parser/Chart.java
deleted file mode 100644
index b10c013..0000000
--- a/src/joshua/decoder/chart_parser/Chart.java
+++ /dev/null
@@ -1,748 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.chart_parser;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.PriorityQueue;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.Decoder;
-import joshua.decoder.JoshuaConfiguration;
-import joshua.decoder.chart_parser.CubePruneState;
-import joshua.decoder.chart_parser.DotChart.DotNode;
-import joshua.decoder.ff.FeatureFunction;
-import joshua.decoder.ff.SourceDependentFF;
-import joshua.decoder.ff.tm.AbstractGrammar;
-import joshua.decoder.ff.tm.Grammar;
-import joshua.decoder.ff.tm.Rule;
-import joshua.decoder.ff.tm.RuleCollection;
-import joshua.decoder.ff.tm.Trie;
-import joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
-import joshua.decoder.hypergraph.HGNode;
-import joshua.decoder.hypergraph.HyperGraph;
-import joshua.decoder.segment_file.Sentence;
-import joshua.decoder.segment_file.Token;
-import joshua.lattice.Arc;
-import joshua.lattice.Lattice;
-import joshua.lattice.Node;
-import joshua.util.ChartSpan;
-
-/**
- * Chart class this class implements chart-parsing: (1) seeding the chart (2)
- * cky main loop over bins, (3) identify applicable rules in each bin
- *
- * Note: the combination operation will be done in Cell
- *
- * Signatures of class: Cell: i, j SuperNode (used for CKY check): i,j, lhs
- * HGNode ("or" node): i,j, lhs, edge ngrams HyperEdge ("and" node)
- *
- * index of sentences: start from zero index of cell: cell (i,j) represent span
- * of words indexed [i,j-1] where i is in [0,n-1] and j is in [1,n]
- *
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
- */
-
-public class Chart {
-
- private final JoshuaConfiguration config;
- // ===========================================================
- // Statistics
- // ===========================================================
-
- /**
- * how many items have been pruned away because its cost is greater than the
- * cutoff in calling chart.add_deduction_in_chart()
- */
- int nMerged = 0;
- int nAdded = 0;
- int nDotitemAdded = 0; // note: there is no pruning in dot-item
-
- public Sentence getSentence() {
- return this.sentence;
- }
-
- // ===============================================================
- // Private instance fields (maybe could be protected instead)
- // ===============================================================
- private ChartSpan<Cell> cells; // note that in some cell, it might be null
- private int sourceLength;
- private List<FeatureFunction> featureFunctions;
- private Grammar[] grammars;
- private DotChart[] dotcharts; // each grammar should have a dotchart associated with it
- private Cell goalBin;
- private int goalSymbolID = -1;
- private Lattice<Token> inputLattice;
-
- private Sentence sentence = null;
-// private SyntaxTree parseTree;
-// private ManualConstraintsHandler manualConstraintsHandler;
- private StateConstraint stateConstraint;
-
- private static final Logger logger = Logger.getLogger(Chart.class.getName());
-
- // ===============================================================
- // Constructors
- // ===============================================================
-
- /*
- * TODO: Once the Segment interface is adjusted to provide a Lattice<String>
- * for the sentence() method, we should just accept a Segment instead of the
- * sentence, segmentID, and constraintSpans parameters. We have the symbol
- * table already, so we can do the integerization here instead of in
- * DecoderThread. GrammarFactory.getGrammarForSentence will want the
- * integerized sentence as well, but then we'll need to adjust that interface
- * to deal with (non-trivial) lattices too. Of course, we get passed the
- * grammars too so we could move all of that into here.
- */
-
- public Chart(Sentence sentence, List<FeatureFunction> featureFunctions, Grammar[] grammars,
- String goalSymbol, JoshuaConfiguration config) {
- this.config = config;
- this.inputLattice = sentence.getLattice();
- this.sourceLength = inputLattice.size() - 1;
- this.featureFunctions = featureFunctions;
-
- this.sentence = sentence;
-
- // TODO: OOV handling no longer handles parse tree input (removed after
- // commit 748eb69714b26dd67cba8e7c25a294347603bede)
-// this.parseTree = null;
-// if (sentence instanceof ParsedSentence)
-// this.parseTree = ((ParsedSentence) sentence).syntaxTree();
-//
- this.cells = new ChartSpan<Cell>(sourceLength, null);
-
- this.goalSymbolID = Vocabulary.id(goalSymbol);
- this.goalBin = new Cell(this, this.goalSymbolID);
-
- /* Create the grammars, leaving space for the OOV grammar. */
- this.grammars = new Grammar[grammars.length + 1];
- for (int i = 0; i < grammars.length; i++)
- this.grammars[i + 1] = grammars[i];
-
- MemoryBasedBatchGrammar oovGrammar = new MemoryBasedBatchGrammar("oov", this.config);
- AbstractGrammar.addOOVRules(oovGrammar, sentence.getLattice(), featureFunctions,
- this.config.true_oovs_only);
- this.grammars[0] = oovGrammar;
-
- // each grammar will have a dot chart
- this.dotcharts = new DotChart[this.grammars.length];
- for (int i = 0; i < this.grammars.length; i++)
- this.dotcharts[i] = new DotChart(this.inputLattice, this.grammars[i], this,
- this.grammars[i].isRegexpGrammar());
-
- // Begin to do initialization work
-
-// manualConstraintsHandler = new ManualConstraintsHandler(this, grammars[grammars.length - 1],
-// sentence.constraints());
-
- stateConstraint = null;
- if (sentence.target() != null)
- // stateConstraint = new StateConstraint(sentence.target());
- stateConstraint = new StateConstraint(Vocabulary.START_SYM + " " + sentence.target() + " "
- + Vocabulary.STOP_SYM);
-
- /* Find the SourceDependent feature and give it access to the sentence. */
- for (FeatureFunction ff : this.featureFunctions)
- if (ff instanceof SourceDependentFF)
- ((SourceDependentFF) ff).setSource(sentence);
-
- Decoder.LOG(2, "Finished seeding chart.");
- }
-
- /**
- * Manually set the goal symbol ID. The constructor expects a String
- * representing the goal symbol, but there may be time (say, for example, in
- * the second pass of a synchronous parse) where we want to set the goal
- * symbol to a particular ID (regardless of String representation).
- * <p>
- * This method should be called before expanding the chart, as chart expansion
- * depends on the goal symbol ID.
- *
- * @param i the id of the goal symbol to use
- */
- public void setGoalSymbolID(int i) {
- this.goalSymbolID = i;
- this.goalBin = new Cell(this, i);
- return;
- }
-
- // ===============================================================
- // The primary method for filling in the chart
- // ===============================================================
-
- /**
- * Construct the hypergraph with the help from DotChart using cube pruning.
- * Cube pruning occurs at the span level, with all completed rules from the
- * dot chart competing against each other; that is, rules with different
- * source sides *and* rules sharing a source side but with different target
- * sides are all in competition with each other.
- *
- * Terminal rules are added to the chart directly.
- *
- * Rules with nonterminals are added to the list of candidates. The candidates
- * list is seeded with the list of all rules and, for each nonterminal in the
- * rule, the 1-best tail node for that nonterminal and subspan. If the maximum
- * arity of a rule is R, then the dimension of the hypercube is R + 1, since
- * the first dimension is used to record the rule.
- */
- private void completeSpan(int i, int j) {
-
- /* STEP 1: create the heap, and seed it with all of the candidate states */
- PriorityQueue<CubePruneState> candidates = new PriorityQueue<CubePruneState>();
-
- /*
- * Look at all the grammars, seeding the chart with completed rules from the
- * DotChart
- */
- for (int g = 0; g < grammars.length; g++) {
- if (!grammars[g].hasRuleForSpan(i, j, inputLattice.distance(i, j))
- || null == dotcharts[g].getDotCell(i, j))
- continue;
-
- // for each rule with applicable rules
- for (DotNode dotNode : dotcharts[g].getDotCell(i, j).getDotNodes()) {
- RuleCollection ruleCollection = dotNode.getRuleCollection();
- if (ruleCollection == null)
- continue;
-
- List<Rule> rules = ruleCollection.getSortedRules(this.featureFunctions);
- SourcePath sourcePath = dotNode.getSourcePath();
-
- if (null == rules || rules.size() == 0)
- continue;
-
- if (ruleCollection.getArity() == 0) {
- /*
- * The total number of arity-0 items (pre-terminal rules) that we add
- * is controlled by num_translation_options in the configuration.
- *
- * We limit the translation options per DotNode; that is, per LHS.
- */
- int numTranslationsAdded = 0;
-
- /* Terminal productions are added directly to the chart */
- for (Rule rule : rules) {
-
- if (config.num_translation_options > 0
- && numTranslationsAdded >= config.num_translation_options) {
- break;
- }
-
- ComputeNodeResult result = new ComputeNodeResult(this.featureFunctions, rule, null, i,
- j, sourcePath, this.sentence);
-
- if (stateConstraint == null || stateConstraint.isLegal(result.getDPStates())) {
- getCell(i, j).addHyperEdgeInCell(result, rule, i, j, null, sourcePath, true);
- numTranslationsAdded++;
- }
- }
- } else {
- /* Productions with rank > 0 are subject to cube pruning */
-
- Rule bestRule = rules.get(0);
-
- List<HGNode> currentTailNodes = new ArrayList<HGNode>();
- List<SuperNode> superNodes = dotNode.getAntSuperNodes();
- for (SuperNode si : superNodes) {
- currentTailNodes.add(si.nodes.get(0));
- }
-
- /*
- * `ranks` records the current position in the cube. the 0th index is
- * the rule, and the remaining indices 1..N correspond to the tail
- * nodes (= nonterminals in the rule). These tail nodes are
- * represented by SuperNodes, which group together items with the same
- * nonterminal but different DP state (e.g., language model state)
- */
- int[] ranks = new int[1 + superNodes.size()];
- Arrays.fill(ranks, 1);
-
- ComputeNodeResult result = new ComputeNodeResult(featureFunctions, bestRule,
- currentTailNodes, i, j, sourcePath, sentence);
- CubePruneState bestState = new CubePruneState(result, ranks, rules, currentTailNodes,
- dotNode);
- candidates.add(bestState);
- }
- }
- }
-
- applyCubePruning(i, j, candidates);
- }
-
- /**
- * Applies cube pruning over a span.
- *
- * @param i
- * @param j
- * @param stateConstraint
- * @param candidates
- */
- private void applyCubePruning(int i, int j, PriorityQueue<CubePruneState> candidates) {
-
- // System.err.println(String.format("CUBEPRUNE: %d-%d with %d candidates",
- // i, j, candidates.size()));
- // for (CubePruneState cand: candidates) {
- // System.err.println(String.format(" CAND " + cand));
- // }
-
- /*
- * There are multiple ways to reach each point in the cube, so short-circuit
- * that.
- */
- HashSet<CubePruneState> visitedStates = new HashSet<CubePruneState>();
-
- int popLimit = config.pop_limit;
- int popCount = 0;
- while (candidates.size() > 0 && ((++popCount <= popLimit) || popLimit == 0)) {
- CubePruneState state = candidates.poll();
-
- DotNode dotNode = state.getDotNode();
- List<Rule> rules = state.rules;
- SourcePath sourcePath = dotNode.getSourcePath();
- List<SuperNode> superNodes = dotNode.getAntSuperNodes();
-
- /*
- * Add the hypothesis to the chart. This can only happen if (a) we're not
- * doing constrained decoding or (b) we are and the state is legal.
- */
- if (stateConstraint == null || stateConstraint.isLegal(state.getDPStates())) {
- getCell(i, j).addHyperEdgeInCell(state.computeNodeResult, state.getRule(), i, j,
- state.antNodes, sourcePath, true);
- }
-
- /*
- * Expand the hypothesis by walking down a step along each dimension of
- * the cube, in turn. k = 0 means we extend the rule being used; k > 0
- * expands the corresponding tail node.
- */
-
- for (int k = 0; k < state.ranks.length; k++) {
-
- /* Copy the current ranks, then extend the one we're looking at. */
- int[] nextRanks = new int[state.ranks.length];
- System.arraycopy(state.ranks, 0, nextRanks, 0, state.ranks.length);
- nextRanks[k]++;
-
- /*
- * We might have reached the end of something (list of rules or tail
- * nodes)
- */
- if (k == 0
- && (nextRanks[k] > rules.size() || (config.num_translation_options > 0 && nextRanks[k] > config.num_translation_options)))
- continue;
- else if ((k != 0 && nextRanks[k] > superNodes.get(k - 1).nodes.size()))
- continue;
-
- /* Use the updated ranks to assign the next rule and tail node. */
- Rule nextRule = rules.get(nextRanks[0] - 1);
- // HGNode[] nextAntNodes = new HGNode[state.antNodes.size()];
- List<HGNode> nextAntNodes = new ArrayList<HGNode>();
- for (int x = 0; x < state.ranks.length - 1; x++)
- nextAntNodes.add(superNodes.get(x).nodes.get(nextRanks[x + 1] - 1));
-
- /* Create the next state. */
- CubePruneState nextState = new CubePruneState(new ComputeNodeResult(featureFunctions,
- nextRule, nextAntNodes, i, j, sourcePath, this.sentence), nextRanks, rules,
- nextAntNodes, dotNode);
-
- /* Skip states that have been explored before. */
- if (visitedStates.contains(nextState))
- continue;
-
- visitedStates.add(nextState);
- candidates.add(nextState);
- }
- }
- }
-
- /* Create a priority queue of candidates for each span under consideration */
- private PriorityQueue<CubePruneState>[] allCandidates;
-
- private ArrayList<SuperNode> nodeStack;
-
- /**
- * Translates the sentence using the CKY+ variation proposed in
- * "A CYK+ Variant for SCFG Decoding Without A Dot Chart" (Sennrich, SSST
- * 2014).
- */
- private int i = -1;
-
- public HyperGraph expandSansDotChart() {
- for (i = sourceLength - 1; i >= 0; i--) {
- allCandidates = new PriorityQueue[sourceLength - i + 2];
- for (int id = 0; id < allCandidates.length; id++)
- allCandidates[id] = new PriorityQueue<CubePruneState>();
-
- nodeStack = new ArrayList<SuperNode>();
-
- for (int j = i + 1; j <= sourceLength; j++) {
- if (!sentence.hasPath(i, j))
- continue;
-
- for (int g = 0; g < this.grammars.length; g++) {
- // System.err.println(String.format("\n*** I=%d J=%d GRAMMAR=%d", i, j, g));
-
- if (j == i + 1) {
- /* Handle terminals */
- Node<Token> node = sentence.getNode(i);
- for (Arc<Token> arc : node.getOutgoingArcs()) {
- int word = arc.getLabel().getWord();
- // disallow lattice decoding for now
- assert arc.getHead().id() == j;
- Trie trie = this.grammars[g].getTrieRoot().match(word);
- if (trie != null && trie.hasRules())
- addToChart(trie, j, false);
- }
- } else {
- /* Recurse for non-terminal case */
- consume(this.grammars[g].getTrieRoot(), i, j - 1);
- }
- }
-
- // Now that we've accumulated all the candidates, apply cube pruning
- applyCubePruning(i, j, allCandidates[j - i]);
-
- // Add unary nodes
- addUnaryNodes(this.grammars, i, j);
- }
- }
-
- // transition_final: setup a goal item, which may have many deductions
- if (null == this.cells.get(0, sourceLength)
- || !this.goalBin.transitToGoal(this.cells.get(0, sourceLength), this.featureFunctions,
- this.sourceLength)) {
- Decoder.LOG(1, String.format("Input %d: Parse failure (either no derivations exist or pruning is too aggressive",
- sentence.id()));
- return null;
- }
-
- return new HyperGraph(this.goalBin.getSortedNodes().get(0), -1, -1, this.sentence);
- }
-
- /**
- * Recursively consumes the trie, following input nodes, finding applicable
- * rules and adding them to bins for each span for later cube pruning.
- *
- * @param dotNode data structure containing information about what's been
- * already matched
- * @param l extension point we're looking at
- *
- */
- private void consume(Trie trie, int j, int l) {
- /*
- * 1. If the trie node has any rules, we can add them to the collection
- *
- * 2. Next, look at all the outgoing nonterminal arcs of the trie node. If
- * any of them match an existing chart item, then we know we can extend
- * (i,j) to (i,l). We then recurse for all m from l+1 to n (the end of the
- * sentence)
- *
- * 3. We also try to match terminals if (j + 1 == l)
- */
-
- // System.err.println(String.format("CONSUME %s / %d %d %d", dotNode,
- // dotNode.begin(), dotNode.end(), l));
-
- // Try to match terminals
- if (inputLattice.distance(j, l) == 1) {
- // Get the current sentence node, and explore all outgoing arcs, since we
- // might be decoding
- // a lattice. For sentence decoding, this is trivial: there is only one
- // outgoing arc.
- Node<Token> inputNode = sentence.getNode(j);
- for (Arc<Token> arc : inputNode.getOutgoingArcs()) {
- int word = arc.getLabel().getWord();
- Trie nextTrie;
- if ((nextTrie = trie.match(word)) != null) {
- // add to chart item over (i, l)
- addToChart(nextTrie, arc.getHead().id(), i == j);
- }
- }
- }
-
- // Now try to match nonterminals
- Cell cell = cells.get(j, l);
- if (cell != null) {
- for (int id : cell.getKeySet()) { // for each supernode (lhs), see if you
- // can match a trie
- Trie nextTrie = trie.match(id);
- if (nextTrie != null) {
- SuperNode superNode = cell.getSuperNode(id);
- nodeStack.add(superNode);
- addToChart(nextTrie, superNode.end(), i == j);
- nodeStack.remove(nodeStack.size() - 1);
- }
- }
- }
- }
-
- /**
- * Adds all rules at a trie node to the chart, unless its a unary rule. A
- * unary rule is the first outgoing arc of a grammar's root trie. For
- * terminals, these are added during the seeding stage; for nonterminals,
- * these confuse cube pruning and can result in infinite loops, and are
- * handled separately (see addUnaryNodes());
- *
- * @param trie the grammar node
- * @param isUnary whether the rules at this dotnode are unary
- */
- private void addToChart(Trie trie, int j, boolean isUnary) {
-
- // System.err.println(String.format("ADD TO CHART %s unary=%s", dotNode,
- // isUnary));
-
- if (!isUnary && trie.hasRules()) {
- DotNode dotNode = new DotNode(i, j, trie, new ArrayList<SuperNode>(nodeStack), null);
-
- addToCandidates(dotNode);
- }
-
- for (int l = j + 1; l <= sentence.length(); l++)
- consume(trie, j, l);
- }
-
- /**
- * Record the completed rule with backpointers for later cube-pruning.
- *
- * @param width
- * @param rules
- * @param tailNodes
- */
- private void addToCandidates(DotNode dotNode) {
- // System.err.println(String.format("ADD TO CANDIDATES %s AT INDEX %d",
- // dotNode, dotNode.end() - dotNode.begin()));
-
- // TODO: one entry per rule, or per rule instantiation (rule together with
- // unique matching of input)?
- List<Rule> rules = dotNode.getRuleCollection().getSortedRules(featureFunctions);
- Rule bestRule = rules.get(0);
- List<SuperNode> superNodes = dotNode.getAntSuperNodes();
-
- List<HGNode> tailNodes = new ArrayList<HGNode>();
- for (SuperNode superNode : superNodes)
- tailNodes.add(superNode.nodes.get(0));
-
- int[] ranks = new int[1 + superNodes.size()];
- Arrays.fill(ranks, 1);
-
- ComputeNodeResult result = new ComputeNodeResult(featureFunctions, bestRule, tailNodes,
- dotNode.begin(), dotNode.end(), dotNode.getSourcePath(), sentence);
- CubePruneState seedState = new CubePruneState(result, ranks, rules, tailNodes, dotNode);
-
- allCandidates[dotNode.end() - dotNode.begin()].add(seedState);
- }
-
- /**
- * This function performs the main work of decoding.
- *
- * @return the hypergraph containing the translated sentence.
- */
- public HyperGraph expand() {
-
- for (int width = 1; width <= sourceLength; width++) {
- for (int i = 0; i <= sourceLength - width; i++) {
- int j = i + width;
- if (logger.isLoggable(Level.FINEST))
- logger.finest(String.format("Processing span (%d, %d)", i, j));
-
- /* Skips spans for which no path exists (possible in lattices). */
- if (inputLattice.distance(i, j) == Float.POSITIVE_INFINITY) {
- continue;
- }
-
- /*
- * 1. Expand the dot through all rules. This is a matter of (a) look for
- * rules over (i,j-1) that need the terminal at (j-1,j) and looking at
- * all split points k to expand nonterminals.
- */
- logger.finest("Expanding cell");
- for (int k = 0; k < this.grammars.length; k++) {
- /**
- * Each dotChart can act individually (without consulting other
- * dotCharts) because it either consumes the source input or the
- * complete nonTerminals, which are both grammar-independent.
- **/
- this.dotcharts[k].expandDotCell(i, j);
- }
-
- /*
- * 2. The regular CKY part: add completed items onto the chart via cube
- * pruning.
- */
- logger.finest("Adding complete items into chart");
- completeSpan(i, j);
-
- /* 3. Process unary rules. */
- logger.finest("Adding unary items into chart");
- addUnaryNodes(this.grammars, i, j);
-
- // (4)=== in dot_cell(i,j), add dot-nodes that start from the /complete/
- // superIterms in
- // chart_cell(i,j)
- logger.finest("Initializing new dot-items that start from complete items in this cell");
- for (int k = 0; k < this.grammars.length; k++) {
- if (this.grammars[k].hasRuleForSpan(i, j, inputLattice.distance(i, j))) {
- this.dotcharts[k].startDotItems(i, j);
- }
- }
-
- /*
- * 5. Sort the nodes in the cell.
- *
- * Sort the nodes in this span, to make them usable for future
- * applications of cube pruning.
- */
- if (null != this.cells.get(i, j)) {
- this.cells.get(i, j).getSortedNodes();
- }
- }
- }
-
- logStatistics(Level.INFO);
-
- // transition_final: setup a goal item, which may have many deductions
- if (null == this.cells.get(0, sourceLength)
- || !this.goalBin.transitToGoal(this.cells.get(0, sourceLength), this.featureFunctions,
- this.sourceLength)) {
- Decoder.LOG(1, String.format("Input %d: Parse failure (either no derivations exist or pruning is too aggressive",
- sentence.id()));
- return null;
- }
-
- logger.fine("Finished expand");
- return new HyperGraph(this.goalBin.getSortedNodes().get(0), -1, -1, this.sentence);
- }
-
- /**
- * Get the requested cell, creating the entry if it doesn't already exist.
- *
- * @param i span start
- * @param j span end
- * @return the cell item
- */
- public Cell getCell(int i, int j) {
- assert i >= 0;
- assert i <= sentence.length();
- assert i <= j;
- if (cells.get(i, j) == null)
- cells.set(i, j, new Cell(this, goalSymbolID));
-
- return cells.get(i, j);
- }
-
- // ===============================================================
- // Private methods
- // ===============================================================
-
- private void logStatistics(Level level) {
- Decoder.LOG(2, String.format("Input %d: Chart: added %d merged %d dot-items added: %d",
- this.sentence.id(), this.nAdded, this.nMerged, this.nDotitemAdded));
- }
-
- /**
- * Handles expansion of unary rules. Rules are expanded in an agenda-based
- * manner to avoid constructing infinite unary chains. Assumes a triangle
- * inequality of unary rule expansion (e.g., A -> B will always be cheaper
- * than A -> C -> B), which is not a true assumption.
- *
- * @param grammars A list of the grammars for the sentence
- * @param i
- * @param j
- * @return the number of nodes added
- */
- private int addUnaryNodes(Grammar[] grammars, int i, int j) {
-
- Cell chartBin = this.cells.get(i, j);
- if (null == chartBin) {
- return 0;
- }
- int qtyAdditionsToQueue = 0;
- ArrayList<HGNode> queue = new ArrayList<HGNode>(chartBin.getSortedNodes());
- HashSet<Integer> seen_lhs = new HashSet<Integer>();
-
- if (logger.isLoggable(Level.FINEST))
- logger.finest("Adding unary to [" + i + ", " + j + "]");
-
- while (queue.size() > 0) {
- HGNode node = queue.remove(0);
- seen_lhs.add(node.lhs);
-
- for (Grammar gr : grammars) {
- if (!gr.hasRuleForSpan(i, j, inputLattice.distance(i, j)))
- continue;
-
- /*
- * Match against the node's LHS, and then make sure the rule collection
- * has unary rules
- */
- Trie childNode = gr.getTrieRoot().match(node.lhs);
- if (childNode != null && childNode.getRuleCollection() != null
- && childNode.getRuleCollection().getArity() == 1) {
-
- ArrayList<HGNode> antecedents = new ArrayList<HGNode>();
- antecedents.add(node);
-
- List<Rule> rules = childNode.getRuleCollection().getSortedRules(this.featureFunctions);
- for (Rule rule : rules) { // for each unary rules
-
- ComputeNodeResult states = new ComputeNodeResult(this.featureFunctions, rule,
- antecedents, i, j, new SourcePath(), this.sentence);
- HGNode resNode = chartBin.addHyperEdgeInCell(states, rule, i, j, antecedents,
- new SourcePath(), true);
-
- if (logger.isLoggable(Level.FINEST))
- logger.finest(rule.toString());
-
- if (null != resNode && !seen_lhs.contains(resNode.lhs)) {
- queue.add(resNode);
- qtyAdditionsToQueue++;
- }
- }
- }
- }
- }
- return qtyAdditionsToQueue;
- }
-
- /***
- * Add a terminal production (X -> english phrase) to the hypergraph.
- *
- * @param i the start index
- * @param j stop index
- * @param rule the terminal rule applied
- * @param srcPath the source path cost
- */
- public void addAxiom(int i, int j, Rule rule, SourcePath srcPath) {
- if (null == this.cells.get(i, j)) {
- this.cells.set(i, j, new Cell(this, this.goalSymbolID));
- }
-
- this.cells.get(i, j).addHyperEdgeInCell(
- new ComputeNodeResult(this.featureFunctions, rule, null, i, j, srcPath, sentence), rule, i,
- j, null, srcPath, false);
-
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/chart_parser/ComputeNodeResult.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/chart_parser/ComputeNodeResult.java b/src/joshua/decoder/chart_parser/ComputeNodeResult.java
deleted file mode 100644
index 373ed40..0000000
--- a/src/joshua/decoder/chart_parser/ComputeNodeResult.java
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.chart_parser;
-
-import java.util.ArrayList;
-
-import java.util.List;
-
-import joshua.decoder.Decoder;
-import joshua.decoder.ff.StatefulFF;
-import joshua.decoder.ff.FeatureFunction;
-import joshua.decoder.ff.FeatureVector;
-import joshua.decoder.ff.state_maintenance.DPState;
-import joshua.decoder.ff.tm.Rule;
-import joshua.decoder.hypergraph.HGNode;
-import joshua.decoder.hypergraph.HyperEdge;
-import joshua.decoder.segment_file.Sentence;
-
-/**
- * This class computes the cost of applying a rule.
- *
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Zhifei Li, <zh...@gmail.com>
- */
-
-public class ComputeNodeResult {
-
- // The cost incurred by the rule itself (and all associated feature functions)
- private float transitionCost;
-
- // transitionCost + the Viterbi costs of the tail nodes.
- private float viterbiCost;
-
- // viterbiCost + a future estimate (outside cost estimate).
- private float pruningCostEstimate;
-
- // The StateComputer objects themselves serve as keys.
- private List<DPState> dpStates;
-
- /**
- * Computes the new state(s) that are produced when applying the given rule to the list of tail
- * nodes. Also computes a range of costs of doing so (the transition cost, the total (Viterbi)
- * cost, and a score that includes a future cost estimate).
- *
- * Old version that doesn't use the derivation state.
- */
- public ComputeNodeResult(List<FeatureFunction> featureFunctions, Rule rule, List<HGNode> tailNodes,
- int i, int j, SourcePath sourcePath, Sentence sentence) {
-
- // The total Viterbi cost of this edge. This is the Viterbi cost of the tail nodes, plus
- // whatever costs we incur applying this rule to create a new hyperedge.
- float viterbiCost = 0.0f;
-
- if (Decoder.VERBOSE >= 4) {
- System.err.println("ComputeNodeResult():");
- System.err.println("-> RULE " + rule);
- }
-
- /*
- * Here we sum the accumulated cost of each of the tail nodes. The total cost of the new
- * hyperedge (the inside or Viterbi cost) is the sum of these nodes plus the cost of the
- * transition. Note that this could and should all be generalized to whatever semiring is being
- * used.
- */
- if (null != tailNodes) {
- for (HGNode item : tailNodes) {
- if (Decoder.VERBOSE >= 4) {
- System.err.println(" -> item.bestedge: " + item);
- System.err.println("-> TAIL NODE " + item);
- }
- viterbiCost += item.bestHyperedge.getBestDerivationScore();
- }
- }
-
- List<DPState> allDPStates = new ArrayList<DPState>();
-
- // The transition cost is the new cost incurred by applying this rule
- float transitionCost = 0.0f;
-
- // The future cost estimate is a heuristic estimate of the outside cost of this edge.
- float futureCostEstimate = 0.0f;
-
- /*
- * We now iterate over all the feature functions, computing their cost and their expected future
- * cost.
- */
- for (FeatureFunction feature : featureFunctions) {
- FeatureFunction.ScoreAccumulator acc = feature.new ScoreAccumulator();
-
- DPState newState = feature.compute(rule, tailNodes, i, j, sourcePath, sentence, acc);
- transitionCost += acc.getScore();
-
- if (Decoder.VERBOSE >= 4)
- System.err.println(String.format("-> FEATURE %s = %.3f * %.3f = %.3f",
- feature.getName(), acc.getScore() / Decoder.weights.getSparse(feature.getName()),
- Decoder.weights.getSparse(feature.getName()), acc.getScore()));
-
- if (feature.isStateful()) {
- futureCostEstimate += feature.estimateFutureCost(rule, newState, sentence);
- allDPStates.add(((StatefulFF)feature).getStateIndex(), newState);
- }
- }
-
- viterbiCost += transitionCost;
-
- if (Decoder.VERBOSE >= 4)
- System.err.println(String.format("-> COST = %.3f", transitionCost));
-
- // Set the final results.
- this.pruningCostEstimate = viterbiCost + futureCostEstimate;
- this.viterbiCost = viterbiCost;
- this.transitionCost = transitionCost;
- this.dpStates = allDPStates;
- }
-
- /**
- * This is called from Cell.java when making the final transition to the goal state.
- * This is done to allow feature functions to correct for partial estimates, since
- * they now have the knowledge that the whole sentence is complete. Basically, this
- * is only used by LanguageModelFF, which does not score partial n-grams, and therefore
- * needs to correct for this when a short sentence ends. KenLMFF corrects for this by
- * always scoring partial hypotheses, and subtracting off the partial score when longer
- * context is available. This would be good to do for the LanguageModelFF feature function,
- * too: it makes search better (more accurate at the beginning, for example), and would
- * also do away with the need for the computeFinal* class of functions (and hooks in
- * the feature function interface).
- */
- public static float computeFinalCost(List<FeatureFunction> featureFunctions,
- List<HGNode> tailNodes, int i, int j, SourcePath sourcePath, Sentence sentence) {
-
- float cost = 0;
- for (FeatureFunction ff : featureFunctions) {
- cost += ff.computeFinalCost(tailNodes.get(0), i, j, sourcePath, sentence);
- }
- return cost;
- }
-
- public static FeatureVector computeTransitionFeatures(List<FeatureFunction> featureFunctions,
- HyperEdge edge, int i, int j, Sentence sentence) {
-
- // Initialize the set of features with those that were present with the rule in the grammar.
- FeatureVector featureDelta = new FeatureVector();
-
- // === compute feature logPs
- for (FeatureFunction ff : featureFunctions) {
- // A null rule signifies the final transition.
- if (edge.getRule() == null)
- featureDelta.add(ff.computeFinalFeatures(edge.getTailNodes().get(0), i, j, edge.getSourcePath(), sentence));
- else {
- featureDelta.add(ff.computeFeatures(edge.getRule(), edge.getTailNodes(), i, j, edge.getSourcePath(), sentence));
- }
- }
-
- return featureDelta;
- }
-
- public float getPruningEstimate() {
- return this.pruningCostEstimate;
- }
-
- /**
- * The complete cost of the Viterbi derivation at this point
- */
- public float getViterbiCost() {
- return this.viterbiCost;
- }
-
- public float getBaseCost() {
- return getViterbiCost() - getTransitionCost();
- }
-
- /**
- * The cost incurred by this edge alone
- *
- * @return
- */
- public float getTransitionCost() {
- return this.transitionCost;
- }
-
- public List<DPState> getDPStates() {
- return this.dpStates;
- }
-
- public void printInfo() {
- System.out.println("scores: " + transitionCost + "; " + viterbiCost + "; "
- + pruningCostEstimate);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/chart_parser/CubePruneState.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/chart_parser/CubePruneState.java b/src/joshua/decoder/chart_parser/CubePruneState.java
deleted file mode 100644
index c9ee8e6..0000000
--- a/src/joshua/decoder/chart_parser/CubePruneState.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.chart_parser;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import joshua.decoder.hypergraph.HGNode;
-import joshua.decoder.chart_parser.DotChart.DotNode;
-import joshua.decoder.ff.state_maintenance.DPState;
-import joshua.decoder.ff.tm.Rule;
-
-// ===============================================================
-// CubePruneState class
-// ===============================================================
-public class CubePruneState implements Comparable<CubePruneState> {
- int[] ranks;
- ComputeNodeResult computeNodeResult;
- List<HGNode> antNodes;
- List<Rule> rules;
- private DotNode dotNode;
-
- public CubePruneState(ComputeNodeResult score, int[] ranks, List<Rule> rules, List<HGNode> antecedents, DotNode dotNode) {
- this.computeNodeResult = score;
- this.ranks = ranks;
- this.rules = rules;
- // create a new vector is critical, because currentAntecedents will change later
- this.antNodes = new ArrayList<HGNode>(antecedents);
- this.dotNode = dotNode;
- }
-
- /**
- * This returns the list of DP states associated with the result.
- *
- * @return
- */
- List<DPState> getDPStates() {
- return this.computeNodeResult.getDPStates();
- }
-
- Rule getRule() {
- return this.rules.get(this.ranks[0]-1);
- }
-
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append("STATE ||| rule=" + getRule() + " inside cost = " + computeNodeResult.getViterbiCost()
- + " estimate = " + computeNodeResult.getPruningEstimate());
- return sb.toString();
- }
-
- public void setDotNode(DotNode node) {
- this.dotNode = node;
- }
-
- public DotNode getDotNode() {
- return this.dotNode;
- }
-
- public boolean equals(Object obj) {
- if (obj == null)
- return false;
- if (!this.getClass().equals(obj.getClass()))
- return false;
- CubePruneState state = (CubePruneState) obj;
- if (state.ranks.length != ranks.length)
- return false;
- for (int i = 0; i < ranks.length; i++)
- if (state.ranks[i] != ranks[i])
- return false;
- if (getDotNode() != state.getDotNode())
- return false;
-
- return true;
- }
-
- public int hashCode() {
- int hash = (dotNode != null) ? dotNode.hashCode() : 0;
- hash += Arrays.hashCode(ranks);
-
- return hash;
- }
-
- /**
- * Compares states by ExpectedTotalLogP, allowing states to be sorted according to their inverse
- * order (high-prob first).
- */
- public int compareTo(CubePruneState another) {
- if (this.computeNodeResult.getPruningEstimate() < another.computeNodeResult
- .getPruningEstimate()) {
- return 1;
- } else if (this.computeNodeResult.getPruningEstimate() == another.computeNodeResult
- .getPruningEstimate()) {
- return 0;
- } else {
- return -1;
- }
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/chart_parser/DotChart.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/chart_parser/DotChart.java b/src/joshua/decoder/chart_parser/DotChart.java
deleted file mode 100644
index b82b68c..0000000
--- a/src/joshua/decoder/chart_parser/DotChart.java
+++ /dev/null
@@ -1,494 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.chart_parser;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.ff.tm.Grammar;
-import joshua.decoder.ff.tm.Rule;
-import joshua.decoder.ff.tm.RuleCollection;
-import joshua.decoder.ff.tm.Trie;
-import joshua.decoder.segment_file.Token;
-import joshua.lattice.Arc;
-import joshua.lattice.Lattice;
-import joshua.lattice.Node;
-import joshua.util.ChartSpan;
-
-/**
- * The DotChart handles Earley-style implicit binarization of translation rules.
- *
- * The {@link DotNode} object represents the (possibly partial) application of a synchronous rule.
- * The implicit binarization is maintained with a pointer to the {@link Trie} node in the grammar,
- * for easy retrieval of the next symbol to be matched. At every span (i,j) of the input sentence,
- * every incomplete DotNode is examined to see whether it (a) needs a terminal and matches against
- * the final terminal of the span or (b) needs a nonterminal and matches against a completed
- * nonterminal in the main chart at some split point (k,j).
- *
- * Once a rule is completed, it is entered into the {@link DotChart}. {@link DotCell} objects are
- * used to group completed DotNodes over a span.
- *
- * There is a separate DotChart for every grammar.
- *
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Kristy Hollingshead Seitz
- */
-class DotChart {
-
- // ===============================================================
- // Package-protected instance fields
- // ===============================================================
- /**
- * Two-dimensional chart of cells. Some cells might be null. This could definitely be represented
- * more efficiently, since only the upper half of this triangle is every used.
- */
- private ChartSpan<DotCell> dotcells;
-
- public DotCell getDotCell(int i, int j) {
- return dotcells.get(i, j);
- }
-
- // ===============================================================
- // Private instance fields (maybe could be protected instead)
- // ===============================================================
-
- /**
- * CKY+ style parse chart in which completed span entries are stored.
- */
- private Chart dotChart;
-
- /**
- * Translation grammar which contains the translation rules.
- */
- private Grammar pGrammar;
-
- /* Length of input sentence. */
- private final int sentLen;
-
- /* Represents the input sentence being translated. */
- private final Lattice<Token> input;
-
- /* If enabled, rule terminals are treated as regular expressions. */
- private final boolean regexpMatching;
-
- // ===============================================================
- // Static fields
- // ===============================================================
-
- private static final Logger logger = Logger.getLogger(DotChart.class.getName());
-
- // ===============================================================
- // Constructors
- // ===============================================================
-
- // TODO: Maybe this should be a non-static inner class of Chart. That would give us implicit
- // access to all the arguments of this constructor. Though we would need to take an argument, i,
- // to know which Chart.this.grammars[i] to use.
-
- /**
- * Constructs a new dot chart from a specified input lattice, a translation grammar, and a parse
- * chart.
- *
- * @param input A lattice which represents an input sentence.
- * @param grammar A translation grammar.
- * @param chart A CKY+ style chart in which completed span entries are stored.
- */
-
-
-
- public DotChart(Lattice<Token> input, Grammar grammar, Chart chart, boolean regExpMatching) {
-
- this.dotChart = chart;
- this.pGrammar = grammar;
- this.input = input;
- this.sentLen = input.size();
-
- this.dotcells = new ChartSpan<DotCell>(sentLen, null);
- this.regexpMatching = regExpMatching;
-
- seed();
- }
-
- /**
- * Add initial dot items: dot-items pointer to the root of the grammar trie.
- */
- void seed() {
- for (int j = 0; j <= sentLen - 1; j++) {
- if (pGrammar.hasRuleForSpan(j, j, input.distance(j, j))) {
- if (null == pGrammar.getTrieRoot()) {
- throw new RuntimeException("trie root is null");
- }
- addDotItem(pGrammar.getTrieRoot(), j, j, null, null, new SourcePath());
- }
- }
- }
-
- /**
- * This function computes all possible expansions of all rules over the provided span (i,j). By
- * expansions, we mean the moving of the dot forward (from left to right) over a nonterminal or
- * terminal symbol on the rule's source side.
- *
- * There are two kinds of expansions:
- *
- * <ol>
- * <li>Expansion over a nonterminal symbol. For this kind of expansion, a rule has a dot
- * immediately prior to a source-side nonterminal. The main Chart is consulted to see whether
- * there exists a completed nonterminal with the same label. If so, the dot is advanced.
- *
- * Discovering nonterminal expansions is a matter of enumerating all split points k such that i <
- * k and k < j. The nonterminal symbol must exist in the main Chart over (k,j).
- *
- * <li>Expansion over a terminal symbol. In this case, expansion is a simple matter of determing
- * whether the input symbol at position j (the end of the span) matches the next symbol in the
- * rule. This is equivalent to choosing a split point k = j - 1 and looking for terminal symbols
- * over (k,j). Note that phrases in the input rule are handled one-by-one as we consider longer
- * spans.
- * </ol>
- */
- void expandDotCell(int i, int j) {
- if (logger.isLoggable(Level.FINEST))
- logger.finest("Expanding dot cell (" + i + "," + j + ")");
-
- /*
- * (1) If the dot is just to the left of a non-terminal variable, we look for theorems or axioms
- * in the Chart that may apply and extend the dot position. We look for existing axioms over all
- * spans (k,j), i < k < j.
- */
- for (int k = i + 1; k < j; k++) {
- extendDotItemsWithProvedItems(i, k, j, false);
- }
-
- /*
- * (2) If the the dot-item is looking for a source-side terminal symbol, we simply match against
- * the input sentence and advance the dot.
- */
- Node<Token> node = input.getNode(j - 1);
- for (Arc<Token> arc : node.getOutgoingArcs()) {
-
- int last_word = arc.getLabel().getWord();
- int arc_len = arc.getHead().getNumber() - arc.getTail().getNumber();
-
- // int last_word=foreign_sent[j-1]; // input.getNode(j-1).getNumber(); //
-
- if (null != dotcells.get(i, j - 1)) {
- // dotitem in dot_bins[i][k]: looking for an item in the right to the dot
-
-
- for (DotNode dotNode : dotcells.get(i, j - 1).getDotNodes()) {
-
- // String arcWord = Vocabulary.word(last_word);
- // Assert.assertFalse(arcWord.endsWith("]"));
- // Assert.assertFalse(arcWord.startsWith("["));
- // logger.info("DotChart.expandDotCell: " + arcWord);
-
- // List<Trie> child_tnodes = ruleMatcher.produceMatchingChildTNodesTerminalevel(dotNode,
- // last_word);
-
- List<Trie> child_tnodes = null;
-
- if (this.regexpMatching) {
- child_tnodes = matchAll(dotNode, last_word);
- } else {
- Trie child_node = dotNode.trieNode.match(last_word);
- child_tnodes = Arrays.asList(child_node);
- }
-
- if (!(child_tnodes == null || child_tnodes.isEmpty())) {
- for (Trie child_tnode : child_tnodes) {
- if (null != child_tnode) {
- addDotItem(child_tnode, i, j - 1 + arc_len, dotNode.antSuperNodes, null,
- dotNode.srcPath.extend(arc));
- }
- }
- }
- }
- }
- }
- }
-
- /**
- * note: (i,j) is a non-terminal, this cannot be a cn-side terminal, which have been handled in
- * case2 of dotchart.expand_cell add dotitems that start with the complete super-items in
- * cell(i,j)
- */
- void startDotItems(int i, int j) {
- extendDotItemsWithProvedItems(i, i, j, true);
- }
-
- // ===============================================================
- // Private methods
- // ===============================================================
-
- /**
- * Attempt to combine an item in the dot chart with an item in the main chart to create a new item
- * in the dot chart. The DotChart item is a {@link DotNode} begun at position i with the dot
- * currently at position k, that is, a partially-applied rule.
- *
- * In other words, this method looks for (proved) theorems or axioms in the completed chart that
- * may apply and extend the dot position.
- *
- * @param i Start index of a dot chart item
- * @param k End index of a dot chart item; start index of a completed chart item
- * @param j End index of a completed chart item
- * @param skipUnary if true, don't extend unary rules
- */
- private void extendDotItemsWithProvedItems(int i, int k, int j, boolean skipUnary) {
- if (this.dotcells.get(i, k) == null || this.dotChart.getCell(k, j) == null) {
- return;
- }
-
- // complete super-items (items over the same span with different LHSs)
- List<SuperNode> superNodes = new ArrayList<SuperNode>(this.dotChart.getCell(k, j)
- .getSortedSuperItems().values());
-
- /* For every partially complete item over (i,k) */
- for (DotNode dotNode : dotcells.get(i, k).dotNodes) {
- /* For every completed nonterminal in the main chart */
- for (SuperNode superNode : superNodes) {
-
- // String arcWord = Vocabulary.word(superNode.lhs);
- // logger.info("DotChart.extendDotItemsWithProvedItems: " + arcWord);
- // Assert.assertTrue(arcWord.endsWith("]"));
- // Assert.assertTrue(arcWord.startsWith("["));
-
- /*
- * Regular Expression matching allows for a regular-expression style rules in the grammar,
- * which allows for a very primitive treatment of morphology. This is an advanced,
- * undocumented feature that introduces a complexity, in that the next "word" in the grammar
- * rule might match more than one outgoing arc in the grammar trie.
- */
- Trie child_node = dotNode.getTrieNode().match(superNode.lhs);
- if (child_node != null) {
- if ((!skipUnary) || (child_node.hasExtensions())) {
- addDotItem(child_node, i, j, dotNode.getAntSuperNodes(), superNode, dotNode
- .getSourcePath().extendNonTerminal());
- }
- }
- }
- }
- }
-
- /*
- * We introduced the ability to have regular expressions in rules for matching against terminals.
- * For example, you could have the rule
- *
- * <pre> [X] ||| l?s herman?s ||| siblings </pre>
- *
- * When this is enabled for a grammar, we need to test against *all* (positive) outgoing arcs of
- * the grammar trie node to see if any of them match, and then return the whole set. This is quite
- * expensive, which is why you should only enable regular expressions for small grammars.
- */
-
- private ArrayList<Trie> matchAll(DotNode dotNode, int wordID) {
- ArrayList<Trie> trieList = new ArrayList<>();
- HashMap<Integer, ? extends Trie> childrenTbl = dotNode.trieNode.getChildren();
-
- if (childrenTbl != null && wordID >= 0) {
- // get all the extensions, map to string, check for *, build regexp
- for (Map.Entry<Integer, ? extends Trie> entry : childrenTbl.entrySet()) {
- Integer arcID = entry.getKey();
- if (arcID == wordID) {
- trieList.add(entry.getValue());
- } else {
- String arcWord = Vocabulary.word(arcID);
- if (Vocabulary.word(wordID).matches(arcWord)) {
- trieList.add(entry.getValue());
- }
- }
- }
- }
- return trieList;
- }
-
-
- /**
- * Creates a {@link DotNode} and adds it into the {@link DotChart} at the correct place. These
- * are (possibly incomplete) rule applications.
- *
- * @param tnode the trie node pointing to the location ("dot") in the grammar trie
- * @param i
- * @param j
- * @param antSuperNodesIn the supernodes representing the rule's tail nodes
- * @param curSuperNode the lefthand side of the rule being created
- * @param srcPath the path taken through the input lattice
- */
- private void addDotItem(Trie tnode, int i, int j, ArrayList<SuperNode> antSuperNodesIn,
- SuperNode curSuperNode, SourcePath srcPath) {
- ArrayList<SuperNode> antSuperNodes = new ArrayList<SuperNode>();
- if (antSuperNodesIn != null) {
- antSuperNodes.addAll(antSuperNodesIn);
- }
- if (curSuperNode != null) {
- antSuperNodes.add(curSuperNode);
- }
-
- DotNode item = new DotNode(i, j, tnode, antSuperNodes, srcPath);
- if (dotcells.get(i, j) == null) {
- dotcells.set(i, j, new DotCell());
- }
- dotcells.get(i, j).addDotNode(item);
- dotChart.nDotitemAdded++;
-
- if (logger.isLoggable(Level.FINEST)) {
- logger.finest(String.format("Add a dotitem in cell (%d, %d), n_dotitem=%d, %s", i, j,
- dotChart.nDotitemAdded, srcPath));
-
- RuleCollection rules = tnode.getRuleCollection();
- if (rules != null) {
- for (Rule r : rules.getRules()) {
- // System.out.println("rule: "+r.toString());
- logger.finest(r.toString());
- }
- }
- }
- }
-
- // ===============================================================
- // Package-protected classes
- // ===============================================================
-
- /**
- * A DotCell groups together DotNodes that have been applied over a particular span. A DotNode, in
- * turn, is a partially-applied grammar rule, represented as a pointer into the grammar trie
- * structure.
- */
- static class DotCell {
-
- // Package-protected fields
- private List<DotNode> dotNodes = new ArrayList<DotNode>();
-
- public List<DotNode> getDotNodes() {
- return dotNodes;
- }
-
- private void addDotNode(DotNode dt) {
- /*
- * if(l_dot_items==null) l_dot_items= new ArrayList<DotItem>();
- */
- dotNodes.add(dt);
- }
- }
-
- /**
- * A DotNode represents the partial application of a rule rooted to a particular span (i,j). It
- * maintains a pointer to the trie node in the grammar for efficient mapping.
- */
- static class DotNode {
-
- private int i, j;
- private Trie trieNode = null;
-
- /* A list of grounded (over a span) nonterminals that have been crossed in traversing the rule */
- private ArrayList<SuperNode> antSuperNodes = null;
-
- /* The source lattice cost of applying the rule */
- private SourcePath srcPath;
-
- @Override
- public String toString() {
- int size = 0;
- if (trieNode != null && trieNode.getRuleCollection() != null)
- size = trieNode.getRuleCollection().getRules().size();
- return String.format("DOTNODE i=%d j=%d #rules=%d #tails=%d", i, j, size, antSuperNodes.size());
- }
-
- /**
- * Initialize a dot node with the span, grammar trie node, list of supernode tail pointers, and
- * the lattice sourcepath.
- *
- * @param i
- * @param j
- * @param trieNode
- * @param antSuperNodes
- * @param srcPath
- */
- public DotNode(int i, int j, Trie trieNode, ArrayList<SuperNode> antSuperNodes, SourcePath srcPath) {
- this.i = i;
- this.j = j;
- this.trieNode = trieNode;
- this.antSuperNodes = antSuperNodes;
- this.srcPath = srcPath;
- }
-
- public boolean equals(Object obj) {
- if (obj == null)
- return false;
- if (!this.getClass().equals(obj.getClass()))
- return false;
- DotNode state = (DotNode) obj;
-
- /*
- * Technically, we should be comparing the span inforamtion as well, but that would require us
- * to store it, increasing memory requirements, and we should be able to guarantee that we
- * won't be comparing DotNodes across spans.
- */
- // if (this.i != state.i || this.j != state.j)
- // return false;
-
- if (this.trieNode != state.trieNode)
- return false;
-
- return true;
- }
-
- /**
- * Technically the hash should include the span (i,j), but since DotNodes are grouped by span,
- * this isn't necessary, and we gain something by not having to store the span.
- */
- public int hashCode() {
- return this.trieNode.hashCode();
- }
-
- // convenience function
- public boolean hasRules() {
- return getTrieNode().getRuleCollection() != null && getTrieNode().getRuleCollection().getRules().size() != 0;
- }
-
- public RuleCollection getRuleCollection() {
- return getTrieNode().getRuleCollection();
- }
-
- public Trie getTrieNode() {
- return trieNode;
- }
-
- public SourcePath getSourcePath() {
- return srcPath;
- }
-
- public ArrayList<SuperNode> getAntSuperNodes() {
- return antSuperNodes;
- }
-
- public int begin() {
- return i;
- }
-
- public int end() {
- return j;
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/chart_parser/ManualConstraintsHandler.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/chart_parser/ManualConstraintsHandler.java b/src/joshua/decoder/chart_parser/ManualConstraintsHandler.java
deleted file mode 100644
index baed984..0000000
--- a/src/joshua/decoder/chart_parser/ManualConstraintsHandler.java
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.chart_parser;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.ff.tm.Grammar;
-import joshua.decoder.ff.tm.Rule;
-import joshua.decoder.segment_file.ConstraintRule;
-import joshua.decoder.segment_file.ConstraintSpan;
-
-/**
- * @author Zhifei Li, <zh...@gmail.com>
- */
-
-public class ManualConstraintsHandler {
-
- // TODO: each span only has one ConstraintSpan
- // contain spans that have LHS or RHS constraints (they are always hard)
- private HashMap<String, ConstraintSpan> constraintSpansForFiltering;
-
- // contain spans that have hard "rule" constraint; key: start_span; value:
- // end_span
- private ArrayList<Span> spansWithHardRuleConstraint;
-
- private Chart chart;
- private Grammar grammarForConstructManualRule;
-
- private static final Logger logger = Logger.getLogger(ManualConstraintsHandler.class.getName());
-
- public ManualConstraintsHandler(Chart chart, Grammar grammarForConstructManualRule,
- List<ConstraintSpan> constraintSpans) {
- this.chart = chart;
- this.grammarForConstructManualRule = grammarForConstructManualRule;
- initialize(constraintSpans);
- }
-
- private void initialize(List<ConstraintSpan> constraintSpans) {
- /**
- * Note that manual constraints or OOV handling is not part of seeding
- * */
- /**
- * (1) add manual rule (only allow flat rules) into the chart as constraints (2) add RHS or LHS
- * constraint into constraintSpansForFiltering (3) add span signature into
- * setOfSpansWithHardRuleConstraint; if the span contains a hard "RULE" constraint
- */
- if (null != constraintSpans) {
-
- for (ConstraintSpan cSpan : constraintSpans) {
- if (null != cSpan.rules()) {
- boolean shouldAdd = false; // contain LHS or RHS constraints?
- for (ConstraintRule cRule : cSpan.rules()) {
- /**
- * Note that LHS and RHS constraints are always hard, while Rule constraint can be soft
- * or hard
- **/
- switch (cRule.type()) {
- case RULE:
- // == prepare the feature scores
- // TODO: this require the input always specify the right number of
- // features
- float[] featureScores = new float[cRule.features().length];
-
- for (int i = 0; i < featureScores.length; i++) {
- if (cSpan.isHard()) {
- featureScores[i] = 0; // force the feature cost as zero
- } else {
- featureScores[i] = cRule.features()[i];
- }
- }
-
- /**
- * If the RULE constraint is hard, then we should filter all out all consituents
- * (within this span), which are contructed from regular grammar
- */
- if (cSpan.isHard()) {
- if (null == this.spansWithHardRuleConstraint) {
- this.spansWithHardRuleConstraint = new ArrayList<Span>();
- }
- this.spansWithHardRuleConstraint.add(new Span(cSpan.start(), cSpan.end()));
- }
-
- int arity = 0; // only allow flat rule (i.e. arity=0)
- Rule rule =
- this.grammarForConstructManualRule.constructManualRule(
- Vocabulary.id(cRule.lhs()), Vocabulary.addAll(cRule.foreignRhs()),
- Vocabulary.addAll(cRule.nativeRhs()), featureScores, arity);
-
- // add to the chart
- chart.addAxiom(cSpan.start(), cSpan.end(), rule, new SourcePath());
- if (logger.isLoggable(Level.INFO))
- logger.info("Adding RULE constraint for span " + cSpan.start() + ", "
- + cSpan.end() + "; isHard=" + cSpan.isHard() + rule.getLHS());
- break;
-
- default:
- shouldAdd = true;
- }
- }
- if (shouldAdd) {
- if (logger.isLoggable(Level.INFO))
- logger.info("Adding LHS or RHS constraint for span " + cSpan.start() + ", "
- + cSpan.end());
- if (null == this.constraintSpansForFiltering) {
- this.constraintSpansForFiltering = new HashMap<String, ConstraintSpan>();
- }
- this.constraintSpansForFiltering.put(getSpanSignature(cSpan.start(), cSpan.end()),
- cSpan);
- }
- }
- }
- }
-
- }
-
- // ===============================================================
- // Manual constraint annotation methods and classes
- // ===============================================================
-
- /**
- * if there are any LHS or RHS constraints for a span, then all the applicable grammar rules in
- * that span will have to pass the filter.
- */
- public List<Rule> filterRules(int i, int j, List<Rule> rulesIn) {
- if (null == this.constraintSpansForFiltering) return rulesIn;
- ConstraintSpan cSpan = this.constraintSpansForFiltering.get(getSpanSignature(i, j));
- if (null == cSpan) { // no filtering
- return rulesIn;
- } else {
-
- List<Rule> rulesOut = new ArrayList<Rule>();
- for (Rule gRule : rulesIn) {
- // gRule will survive, if any constraint (LHS or RHS) lets it survive
- for (ConstraintRule cRule : cSpan.rules()) {
- if (shouldSurvive(cRule, gRule)) {
- rulesOut.add(gRule);
- break;
- }
- }
- }
- return rulesOut;
- }
- }
-
- /**
- * should we filter out the gRule based on the manually provided constraint cRule
- */
- public boolean shouldSurvive(ConstraintRule cRule, Rule gRule) {
-
- switch (cRule.type()) {
- case LHS:
- return (gRule.getLHS() == Vocabulary.id(cRule.lhs()));
- case RHS:
- int[] targetWords = Vocabulary.addAll(cRule.nativeRhs());
-
- if (targetWords.length != gRule.getEnglish().length) return false;
-
- for (int t = 0; t < targetWords.length; t++) {
- if (targetWords[t] != gRule.getEnglish()[t]) return false;
- }
-
- return true;
- default: // not surviving
- return false;
- }
- }
-
- /**
- * if a span is *within* the coverage of a *hard* rule constraint, then this span will be only
- * allowed to use the mannual rules
- */
- public boolean containHardRuleConstraint(int startSpan, int endSpan) {
- if (null != this.spansWithHardRuleConstraint) {
- for (Span span : this.spansWithHardRuleConstraint) {
- if (startSpan >= span.startPos && endSpan <= span.endPos) return true;
- }
- }
- return false;
- }
-
- private String getSpanSignature(int i, int j) {
- return i + " " + j;
- }
-
- private static class Span {
-
- int startPos;
- int endPos;
-
- public Span(int startPos, int endPos) {
- this.startPos = startPos;
- this.endPos = endPos;
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/chart_parser/SourcePath.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/chart_parser/SourcePath.java b/src/joshua/decoder/chart_parser/SourcePath.java
deleted file mode 100644
index b1fbe09..0000000
--- a/src/joshua/decoder/chart_parser/SourcePath.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.chart_parser;
-
-import joshua.decoder.segment_file.Token;
-import joshua.lattice.Arc;
-
-/**
- * This class represents information about a path taken through the source lattice.
- *
- * @note This implementation only tracks the source path cost which is assumed to be a scalar value.
- * If you need multiple values, or want to recover more detailed path statistics, you'll need
- * to update this code.
- */
-public class SourcePath {
-
- private final float pathCost;
-
- public SourcePath() {
- pathCost = 0.0f;
- }
-
- private SourcePath(float cost) {
- pathCost = cost;
- }
-
- public float getPathCost() {
- return pathCost;
- }
-
- public SourcePath extend(Arc<Token> srcEdge) {
- float tcost = (float) srcEdge.getCost();
- if (tcost == 0.0)
- return this;
- else
- return new SourcePath(pathCost + (float) srcEdge.getCost());
- }
-
- public SourcePath extendNonTerminal() {
- return this;
- }
-
- public String toString() {
- return "SourcePath.cost=" + pathCost;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/chart_parser/StateConstraint.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/chart_parser/StateConstraint.java b/src/joshua/decoder/chart_parser/StateConstraint.java
deleted file mode 100644
index e17cee0..0000000
--- a/src/joshua/decoder/chart_parser/StateConstraint.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.chart_parser;
-
-import java.util.Collection;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.ff.state_maintenance.DPState;
-import joshua.decoder.ff.state_maintenance.NgramDPState;
-
-/**
- * This class provides constraints on the sorts of states that are permitted in the chart. Its
- * original motivation was to be used as a means of doing forced decoding, which is accomplished by
- * forcing all n-gram states that are created to match the target string.
- *
- * @author Matt Post <po...@cs.jhu.edu>
- *
- */
-public class StateConstraint {
- private String target = null;
-
- public StateConstraint(String target) {
- this.target = " <s> " + target + " </s> ";
- }
-
- /**
- * Determines if all of the states passed in are legal in light of the input that was passed
- * earlier. Currently only defined for n-gram states.
- *
- * @param dpStates
- * @return whether the states are legal in light of the target side sentence
- */
- public boolean isLegal(Collection<DPState> dpStates) {
- /*
- * Iterate over all the state-contributing objects associated with the new state, querying
- * n-gram ones (of which there is probably only one), allowing them to veto the move.
- */
- for (DPState dpState : dpStates) {
- if (dpState instanceof NgramDPState) {
- // Build a regular expression out of the state context.
- String leftWords = " "
- + Vocabulary.getWords(((NgramDPState) dpState).getLeftLMStateWords()) + " ";
- String rightWords = " "
- + Vocabulary.getWords(((NgramDPState) dpState).getRightLMStateWords()) + " ";
-
- int leftPos = this.target.indexOf(leftWords);
- int rightPos = this.target.lastIndexOf(rightWords);
-
- boolean legal = (leftPos != -1 && leftPos <= rightPos);
-// System.err.println(String.format(" isLegal(%s @ %d,%s @ %d) = %s", leftWords, leftPos,
-// rightWords, rightPos, legal));
-
- return legal;
- }
- }
-
- return true;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/chart_parser/SuperNode.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/chart_parser/SuperNode.java b/src/joshua/decoder/chart_parser/SuperNode.java
deleted file mode 100644
index 6ed4bcd..0000000
--- a/src/joshua/decoder/chart_parser/SuperNode.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.chart_parser;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import joshua.decoder.hypergraph.HGNode;
-
-/**
- * Represents a list of items in the hypergraph that have the same left-hand side but may have
- * different LM states.
- *
- * @author Zhifei Li
- */
-class SuperNode {
-
- /** Common left-hand side state. */
- final int lhs;
-
- /**
- * List of hypergraph nodes, each of which has its own language model state.
- */
- final List<HGNode> nodes;
-
- /**
- * All nodes in a SuperNode have the same start and end points, so we pick the first one and
- * return it.
- *
- * @return
- */
- public int end() {
- return nodes.get(0).j;
- }
-
-
- /**
- * Constructs a super item defined by a common left-hand side.
- *
- * @param lhs Left-hand side token
- */
- public SuperNode(int lhs) {
- this.lhs = lhs;
- this.nodes = new ArrayList<HGNode>();
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/chart_parser/package.html
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/chart_parser/package.html b/src/joshua/decoder/chart_parser/package.html
deleted file mode 100644
index d7ca8f6..0000000
--- a/src/joshua/decoder/chart_parser/package.html
+++ /dev/null
@@ -1,23 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides an implementation of a hierarchical phrase-based decoder for statistical machine translation.
-
-<h2>Related Documentation</h2>
-
-<ul>
- <li>The code in this package is based largely on algorithms from Chiang (2007).
-</ul>
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/ff/ArityPhrasePenalty.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/ArityPhrasePenalty.java b/src/joshua/decoder/ff/ArityPhrasePenalty.java
deleted file mode 100644
index 8223899..0000000
--- a/src/joshua/decoder/ff/ArityPhrasePenalty.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.ff;
-
-import java.util.List;
-
-import joshua.decoder.JoshuaConfiguration;
-import joshua.decoder.ff.state_maintenance.DPState;
-import joshua.decoder.ff.tm.Rule;
-import joshua.decoder.hypergraph.HGNode;
-import joshua.decoder.segment_file.Sentence;
-import joshua.decoder.chart_parser.SourcePath;
-import joshua.corpus.Vocabulary;
-
-/**
- * This feature function counts rules from a particular grammar (identified by the owner) having an
- * arity within a specific range. It expects three parameters upon initialization: the owner, the
- * minimum arity, and the maximum arity.
- *
- * @author Matt Post <post@cs.jhu.edu
- * @author Zhifei Li <zh...@gmail.com>
- */
-public class ArityPhrasePenalty extends StatelessFF {
-
- // when the rule.arity is in the range, then this feature is activated
- private final int owner;
- private final int minArity;
- private final int maxArity;
-
- public ArityPhrasePenalty(final FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, "ArityPenalty", args, config);
-
- this.owner = Vocabulary.id(parsedArgs.get("owner"));
- this.minArity = Integer.parseInt(parsedArgs.get("min-arity"));
- this.maxArity = Integer.parseInt(parsedArgs.get("max-arity"));
- }
-
- /**
- * Returns 1 if the arity penalty feature applies to the current rule.
- */
- private int isEligible(final Rule rule) {
- if (this.owner == rule.getOwner() && rule.getArity() >= this.minArity
- && rule.getArity() <= this.maxArity)
- return 1;
-
- return 0;
- }
-
- @Override
- public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
- Sentence sentence, Accumulator acc) {
- acc.add(name, isEligible(rule));
-
- return null;
- }
-}