You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/16 06:26:55 UTC
[39/66] [partial] incubator-joshua git commit: JOSHUA-252 Make it
possible to use Maven to build Joshua
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/hypergraph/TrivialInsideOutside.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/TrivialInsideOutside.java b/src/joshua/decoder/hypergraph/TrivialInsideOutside.java
deleted file mode 100644
index f6f164f..0000000
--- a/src/joshua/decoder/hypergraph/TrivialInsideOutside.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.hypergraph;
-
-/**
- * @author Zhifei Li, <zh...@gmail.com>
- * @version $LastChangedDate$
- */
-
-public class TrivialInsideOutside extends DefaultInsideOutside {
- // used by inside-outside estimation
- protected double getHyperedgeLogProb(HyperEdge dt, HGNode parent_it) {
- return dt.getTransitionLogP(false);// TODO this is very bad in terms of computation
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/hypergraph/ViterbiExtractor.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/ViterbiExtractor.java b/src/joshua/decoder/hypergraph/ViterbiExtractor.java
deleted file mode 100644
index 31c8dc0..0000000
--- a/src/joshua/decoder/hypergraph/ViterbiExtractor.java
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.hypergraph;
-
-import static java.util.Collections.emptyList;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import joshua.decoder.ff.FeatureFunction;
-import joshua.decoder.ff.FeatureVector;
-import joshua.decoder.segment_file.Sentence;
-
-/**
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
- */
-
-public class ViterbiExtractor {
-
- /**
- * This function recursively visits the nodes of the Viterbi derivation in a depth-first
- * traversal, applying the walker to each of the nodes. It provides a more general framework for
- * implementing operations on a tree.
- *
- * @param node the node to start viterbi traversal from
- * @param walker an implementation of the WalkerFunction interface, to be applied to each node in
- * the tree
- * @param nodeIndex the tail node index of the given node. This allows implementations of the
- * WalkerFunction to associate nonTerminals with the index of node in the outgoing edges
- * list of tail nodes.
- */
- public static void viterbiWalk(
- final HGNode node,
- final WalkerFunction walker,
- final int nodeIndex) {
- // apply the walking function to the node
- walker.apply(node, nodeIndex);
- // recurse on the anterior nodes of the best hyperedge in source order
- final HyperEdge bestEdge = node.bestHyperedge;
- final List<HGNode> tailNodes = bestEdge.getTailNodes();
- if (tailNodes != null) {
- for (int tailNodeIndex = 0; tailNodeIndex < tailNodes.size(); tailNodeIndex++) {
- viterbiWalk(tailNodes.get(tailNodeIndex), walker, tailNodeIndex);
- }
- }
- }
-
- public static void viterbiWalk(final HGNode node, final WalkerFunction walker) {
- viterbiWalk(node, walker, 0);
- }
-
- /**
- * Returns the Viterbi translation of the Hypergraph (includes sentence markers)
- */
- public static String getViterbiString(final HyperGraph hg) {
- if (hg == null)
- return "";
-
- final WalkerFunction viterbiOutputStringWalker = new OutputStringExtractor(false);
- viterbiWalk(hg.goalNode, viterbiOutputStringWalker);
- return viterbiOutputStringWalker.toString();
- }
-
- /**
- * Returns the Viterbi feature vector
- */
- public static FeatureVector getViterbiFeatures(
- final HyperGraph hg,
- final List<FeatureFunction> featureFunctions,
- final Sentence sentence) {
- if (hg == null)
- return new FeatureVector();
-
- final FeatureVectorExtractor extractor = new FeatureVectorExtractor(
- featureFunctions, sentence);
- viterbiWalk(hg.goalNode, extractor);
- return extractor.getFeatures();
- }
-
- /**
- * Returns the Viterbi Word Alignments as String.
- */
- public static String getViterbiWordAlignments(final HyperGraph hg) {
- if (hg == null)
- return "";
-
- final WordAlignmentExtractor wordAlignmentWalker = new WordAlignmentExtractor();
- viterbiWalk(hg.goalNode, wordAlignmentWalker);
- return wordAlignmentWalker.toString();
- }
-
- /**
- * Returns the Viterbi Word Alignments as list of lists (target-side).
- */
- public static List<List<Integer>> getViterbiWordAlignmentList(final HyperGraph hg) {
- if (hg == null)
- return emptyList();
-
- final WordAlignmentExtractor wordAlignmentWalker = new WordAlignmentExtractor();
- viterbiWalk(hg.goalNode, wordAlignmentWalker);
- return wordAlignmentWalker.getFinalWordAlignments();
- }
-
- /** find 1best hypergraph */
- public static HyperGraph getViterbiTreeHG(HyperGraph hg_in) {
- HyperGraph res =
- new HyperGraph(cloneNodeWithBestHyperedge(hg_in.goalNode), -1, -1, null);
- // TODO: number of items/deductions
- get1bestTreeNode(res.goalNode);
- return res;
- }
-
- private static void get1bestTreeNode(HGNode it) {
- HyperEdge dt = it.bestHyperedge;
- if (null != dt.getTailNodes()) {
- for (int i = 0; i < dt.getTailNodes().size(); i++) {
- HGNode antNode = dt.getTailNodes().get(i);
- HGNode newNode = cloneNodeWithBestHyperedge(antNode);
- dt.getTailNodes().set(i, newNode);
- get1bestTreeNode(newNode);
- }
- }
- }
-
- // TODO: tbl_states
- private static HGNode cloneNodeWithBestHyperedge(HGNode inNode) {
- List<HyperEdge> hyperedges = new ArrayList<HyperEdge>(1);
- HyperEdge cloneEdge = cloneHyperedge(inNode.bestHyperedge);
- hyperedges.add(cloneEdge);
- return new HGNode(inNode.i, inNode.j, inNode.lhs, hyperedges, cloneEdge, inNode.getDPStates());
- }
-
-
- private static HyperEdge cloneHyperedge(HyperEdge inEdge) {
- List<HGNode> antNodes = null;
- if (null != inEdge.getTailNodes()) {
- antNodes = new ArrayList<HGNode>(inEdge.getTailNodes());// l_ant_items will be changed in
- // get_1best_tree_item
- }
- HyperEdge res =
- new HyperEdge(inEdge.getRule(), inEdge.getBestDerivationScore(), inEdge.getTransitionLogP(false),
- antNodes, inEdge.getSourcePath());
- return res;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/hypergraph/WalkerFunction.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/WalkerFunction.java b/src/joshua/decoder/hypergraph/WalkerFunction.java
deleted file mode 100644
index 65bffbf..0000000
--- a/src/joshua/decoder/hypergraph/WalkerFunction.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.hypergraph;
-
-/**
- * Classes implementing this interface define a single function that is applied to each node. This
- * interface is used for various walkers (ViterbiExtractor).
- */
-public interface WalkerFunction {
-
- /**
- * Function that is applied to node at tail node index nodeIndex.
- * nodeIndex indicates the index of node in the list of tailnodes for the
- * outgoing edge.
- */
- void apply(HGNode node, int nodeIndex);
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/hypergraph/WordAlignmentExtractor.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/WordAlignmentExtractor.java b/src/joshua/decoder/hypergraph/WordAlignmentExtractor.java
deleted file mode 100644
index 837c69f..0000000
--- a/src/joshua/decoder/hypergraph/WordAlignmentExtractor.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.hypergraph;
-
-import static java.util.Collections.emptyList;
-
-import java.util.List;
-import java.util.Stack;
-
-import joshua.decoder.ff.tm.Rule;
-import joshua.decoder.hypergraph.KBestExtractor.DerivationState;
-import joshua.decoder.hypergraph.KBestExtractor.DerivationVisitor;
-
-/**
- * This class enables extraction of word-level alignments from hypotheses.
- * It implements two interfaces, WalkerFunction and DerivationVisitor.
- * The former is for using the Viterbi walk function, the latter is for
- * k-best extraction.
- * Intermediate WordAlignmentStates are placed on a stack and/or merged down
- * if possible.
- * @author fhieber
- */
-public class WordAlignmentExtractor implements WalkerFunction, DerivationVisitor {
-
- private final Stack<WordAlignmentState> stack = new Stack<WordAlignmentState>();
-
- /**
- * Merges a state with the top of the stack if applicable or places it on top of the stack.
- */
- private void merge(final WordAlignmentState state) {
- // if alignment state has no NTs left AND stack is not empty
- // and parent state on stack still needs something to substitute
- if (!stack.isEmpty()
- && state.isComplete()) {
- final WordAlignmentState parentState = stack.pop();
- if (parentState.isComplete()) {
- throw new IllegalStateException("Parent state already complete");
- }
- parentState.substituteIn(state);
- merge(parentState);
- } else {
- stack.add(state);
- }
- }
-
- /**
- * Common entry point for WalkerFunction and DerivationVisitor.
- */
- private void extract(final Rule rule, final int spanStart) {
- if (rule != null) {
- merge(new WordAlignmentState(rule, spanStart));
- }
- }
-
- /**
- * entry for Viterbi walker. Calls word alignment extraction
- * for best hyperedge from given node.
- */
- @Override
- public void apply(HGNode node, int nodeIndex) {
- extract(node.bestHyperedge.getRule(), node.i);
- }
-
- /**
- * Visiting a node during k-best extraction is the same as
- * apply() for Viterbi extraction but using the edge from
- * the Derivation state.
- */
- @Override
- public void before(final DerivationState state, final int level, int tailNodeIndex) {
- extract(state.edge.getRule(), state.parentNode.i);
- }
-
- /**
- * Nothing to do after visiting a node.
- */
- @Override
- public void after(final DerivationState state, final int level, int tailNodeIndex) {}
-
- /**
- * Final word alignment without sentence markers
- * or empty list if stack is empty.
- */
- public List<List<Integer>> getFinalWordAlignments() {
- if (stack.isEmpty()) {
- return emptyList();
- }
-
- if (stack.size() != 1) {
- throw new RuntimeException(
- String.format(
- "Stack of WordAlignmentExtractor should contain only a single (last) element, but was size %d", stack.size()));
- }
-
- return stack.peek().toFinalList();
- }
-
- /**
- * Returns a String representation of the (final) word alignment
- * state on top of the stack.
- * Empty string for empty stack.
- */
- @Override
- public String toString() {
- if (stack.isEmpty()) {
- return "";
- }
-
- if (stack.size() != 1) {
- throw new RuntimeException(
- String.format(
- "Stack of WordAlignmentExtractor should contain only a single (last) element, but was size %d", stack.size()));
- }
-
- return stack.peek().toFinalString();
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/hypergraph/WordAlignmentState.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/WordAlignmentState.java b/src/joshua/decoder/hypergraph/WordAlignmentState.java
deleted file mode 100644
index 258e062..0000000
--- a/src/joshua/decoder/hypergraph/WordAlignmentState.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.hypergraph;
-
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.ListIterator;
-import java.util.Map;
-
-import joshua.decoder.ff.tm.Rule;
-
-/**
- * This class encodes a derivation state in terms of a list of alignment points.
- * Whenever a child instance is substituted into the parent instance, we need to
- * adjust source indexes of the alignments.
- *
- * @author fhieber
- */
-public class WordAlignmentState {
-
- /**
- * each element in this list corresponds to a token on the target side of the
- * rule. The values of the elements correspond to the aligned source token on
- * the source side of the rule.
- */
- private LinkedList<AlignedSourceTokens> trgPoints;
- private int srcStart;
- /** number of NTs we need to substitute. */
- private int numNT;
- /** grows with substitutions of child rules. Reaches original Rule span if substitutions are complete */
- private int srcLength;
-
- /**
- * construct AlignmentState object from a virgin Rule and its source span.
- * Determines if state is complete (if no NT present)
- */
- WordAlignmentState(Rule rule, int start) {
- trgPoints = new LinkedList<AlignedSourceTokens>();
- srcLength = rule.getFrench().length;
- numNT = rule.getArity();
- srcStart = start;
- Map<Integer, List<Integer>> alignmentMap = rule.getAlignmentMap();
- int[] nonTermPositions = rule.getNonTerminalSourcePositions();
- int[] trg = rule.getEnglish();
- // for each target index, create a TargetAlignmentPoint
- for (int trgIndex = 0; trgIndex < trg.length; trgIndex++) {
- AlignedSourceTokens trgPoint = new AlignedSourceTokens();
-
- if (trg[trgIndex] >= 0) { // this is a terminal symbol, check for alignment
- if (alignmentMap.containsKey(trgIndex)) {
- // add source indexes to TargetAlignmentPoint
- for (int srcIdx : alignmentMap.get(trgIndex)) {
- trgPoint.add(srcStart + srcIdx);
- }
- } else { // this target word is NULL-aligned
- trgPoint.setNull();
- }
- } else { // this is a nonterminal ([X]) [actually its the (negative) index of the NT in the source
- trgPoint.setNonTerminal();
- trgPoint.add(srcStart + nonTermPositions[Math.abs(trg[trgIndex]) - 1]);
- }
- trgPoints.add(trgPoint);
- }
- }
-
- /**
- * if there are no more NonTerminals to substitute,
- * this state is said to be complete
- */
- public boolean isComplete() {
- return numNT == 0;
- }
-
- /**
- * builds the final alignment string in the standard alignment format: src -
- * trg. Sorted by trg indexes. Disregards the sentence markers.
- */
- public String toFinalString() {
- StringBuilder sb = new StringBuilder();
- int t = 0;
- for (AlignedSourceTokens pt : trgPoints) {
- for (int s : pt)
- sb.append(String.format(" %d-%d", s-1, t-1)); // disregard sentence
- // markers
- t++;
- }
- String result = sb.toString();
- if (!result.isEmpty())
- return result.substring(1);
- return result;
- }
-
- /**
- * builds the final alignment list.
- * each entry in the list corresponds to a list of aligned source tokens.
- * First and last item in trgPoints is skipped.
- */
- public List<List<Integer>> toFinalList() {
- assert (isComplete() == true);
- List<List<Integer>> alignment = new ArrayList<List<Integer>> ();
- if (trgPoints.isEmpty())
- return alignment;
- ListIterator<AlignedSourceTokens> it = trgPoints.listIterator();
- it.next(); // skip first item (sentence marker)
- while (it.hasNext()) {
- AlignedSourceTokens alignedSourceTokens = it.next();
- if (it.hasNext()) { // if not last element in trgPoints
- List<Integer> newAlignedSourceTokens = new ArrayList<Integer>();
- for (Integer sourceIndex : alignedSourceTokens)
- newAlignedSourceTokens.add(sourceIndex - 1); // shift by one to disregard sentence marker
- alignment.add(newAlignedSourceTokens);
- }
- }
- return alignment;
- }
-
- /**
- * String representation for debugging.
- */
- public String toString() {
- return String.format("%s , len=%d start=%d, isComplete=%s",
- trgPoints.toString(), srcLength, srcStart, this.isComplete());
- }
-
- /**
- * substitutes a child WorldAlignmentState into this instance at the first
- * NT it finds. Also shifts the indeces in this instance by the span/width of the
- * child that is to be substituted.
- * Substitution order is determined by the source-first traversal through the hypergraph.
- */
- void substituteIn(WordAlignmentState child) {
- // update existing indexes by length of child (has no effect on NULL and
- // NonTerminal points)
- for (AlignedSourceTokens trgPoint : trgPoints)
- trgPoint.shiftBy(child.srcStart, child.srcLength - 1);
-
- // now substitute in the child at first NT, modifying the list
- ListIterator<AlignedSourceTokens> it = trgPoints.listIterator();
- while (it.hasNext()) {
- AlignedSourceTokens trgPoint = it.next();
- if (trgPoint.isNonTerminal()) { // found first NT
- it.remove(); // remove NT symbol
- for (AlignedSourceTokens childElement : child.trgPoints) {
- childElement.setFinal(); // child source indexes are final, do not change them anymore
- it.add(childElement);
- }
- this.srcLength += child.srcLength - 1; // -1 (NT)
- this.numNT--;
- break;
- }
- }
- }
-
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/hypergraph/package.html
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/hypergraph/package.html b/src/joshua/decoder/hypergraph/package.html
deleted file mode 100644
index 6fdd043..0000000
--- a/src/joshua/decoder/hypergraph/package.html
+++ /dev/null
@@ -1,18 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides implementations of hypergraph data structures and related algorithms
-used in extracting translation results in hierarchical phrase-based translation.
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/io/DeNormalize.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/io/DeNormalize.java b/src/joshua/decoder/io/DeNormalize.java
deleted file mode 100644
index 328e01b..0000000
--- a/src/joshua/decoder/io/DeNormalize.java
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.io;
-
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Denormalize a(n English) string in a collection of ways listed below.
- * <UL>
- * <LI>Capitalize the first character in the string</LI>
- * <LI>Detokenize</LI>
- * <UL>
- * <LI>Delete whitespace in front of periods and commas</LI>
- * <LI>Join contractions</LI>
- * <LI>Capitalize name titles (Mr Ms Miss Dr etc.)</LI>
- * <LI>TODO: Handle surrounding characters ([{<"''">}])</LI>
- * <LI>TODO: Join multi-period abbreviations (e.g. M.Phil. i.e.)</LI>
- * <LI>TODO: Handle ambiguities like "st.", which can be an abbreviation for both "Saint" and
- * "street"</LI>
- * <LI>TODO: Capitalize both the title and the name of a person, e.g. Mr. Morton (named entities
- * should be demarcated).</LI>
- * </UL>
- * </UL> <bold>N.B.</bold> These methods all assume that every translation result that will be
- * denormalized has the following format:
- * <UL>
- * <LI>There is only one space between every pair of tokens</LI>
- * <LI>There is no whitespace before the first token</LI>
- * <LI>There is no whitespace after the final token</LI>
- * <LI>Standard spaces are the only type of whitespace</LI>
- * </UL>
- * </UL>
- */
-
-public class DeNormalize {
-
- /**
- * Apply all the denormalization methods to the normalized input line.
- *
- * @param normalized
- * @return
- */
- public static String processSingleLine(String normalized) {
- // The order in which the methods are applied could matter in some situations. E.g., a token to
- // be matched is "phd", but if it is the first token in the line, it might have already been
- // capitalized to "Phd" by the capitalizeFirstLetter method, and because the "phd" token won't
- // match, "Phd" won't be corrected to "PhD".
- String deNormalized = normalized;
- deNormalized = capitalizeNameTitleAbbrvs(deNormalized);
- deNormalized = replaceBracketTokens(deNormalized);
- deNormalized = joinPunctuationMarks(deNormalized);
- deNormalized = joinHyphen(deNormalized);
- deNormalized = joinContractions(deNormalized);
- deNormalized = capitalizeLineFirstLetter(deNormalized);
- return deNormalized;
- }
-
- /**
- * Capitalize the first letter of a line. This should be the last denormalization step applied to
- * a line.
- *
- * @param line The single-line input string
- * @return The input string modified as described above
- */
- public static String capitalizeLineFirstLetter(String line) {
- String result = null;
- Pattern regexp = Pattern.compile("[^\\p{Punct}\\p{Space}��]");
- Matcher matcher = regexp.matcher(line);
- if (matcher.find()) {
- String match = matcher.group(0);
- result = line.replaceFirst(match, match.toUpperCase());
- } else {
- result = line;
- }
- return result;
- }
-
- /**
- * Scanning from left-to-right, a comma or period preceded by a space will become just the
- * comma/period.
- *
- * @param line The single-line input string
- * @return The input string modified as described above
- */
- public static String joinPunctuationMarks(String line) {
- String result = line;
- result = result.replace(" ,", ",");
- result = result.replace(" ;", ";");
- result = result.replace(" :", ":");
- result = result.replace(" .", ".");
- result = result.replace(" !", "!");
- result = result.replace("� ", "�");
- result = result.replace(" ?", "?");
- result = result.replace("� ", "�");
- result = result.replace(" )", ")");
- result = result.replace(" ]", "]");
- result = result.replace(" }", "}");
- result = result.replace("( ", "(");
- result = result.replace("[ ", "[");
- result = result.replace("{ ", "{");
- return result;
- }
-
- /**
- * Scanning from left-to-right, a hyphen surrounded by a space before and after it will become
- * just the hyphen.
- *
- * @param line The single-line input string
- * @return The input string modified as described above
- */
- public static String joinHyphen(String line) {
- return line.replace(" - ", "-");
- }
-
- /**
- * Scanning the line from left-to-right, a contraction suffix preceded by a space will become just
- * the contraction suffix. <br>
- * <br>
- * I.e., the preceding space will be deleting, joining the prefix to the suffix. <br>
- * <br>
- * E.g.
- *
- * <pre>wo n't</pre>
- *
- * becomes
- *
- * <pre>won't</pre>
- *
- * @param line The single-line input string
- * @return The input string modified as described above
- */
- public static String joinContractions(String line) {
- String result = line;
- for (String suffix : new String[] {"'d", "'ll", "'m", "n't", "'re", "'s", "'ve",}) {
- result = result.replace(" " + suffix, suffix);
- }
- return result;
- }
-
- /**
- * Capitalize the first character of the titles of names: Mr Mrs Ms Miss Dr Prof
- *
- * @param line The single-line input string
- * @return The input string modified as described above
- */
- public static String capitalizeNameTitleAbbrvs(String line) {
- String result = line;
-
- // Capitalize only the first character of certain name titles.
- for (String title : new String[] {"dr", "miss", "mr", "mrs", "ms", "prof"}) {
- result =
- result.replaceAll("\\b" + title + "\\b",
- Character.toUpperCase(title.charAt(0)) + title.substring(1));
- }
- // Capitalize the relevant characters of certain name titles.
- result = result.replaceAll("\\b" + "phd" + "\\b", "PhD");
- result = result.replaceAll("\\b" + "mphil" + "\\b", "MPhil");
- return result;
- }
-
- public static String capitalizeI(String line) {
- // Capitalize only the first character of certain name titles.
- return line.replaceAll("\\b" + "i" + "\\b", "I");
- }
-
- /**
- * Case-insensitively replace all of the character sequences that represent a bracket character.
- *
- * Keys are token representations of abbreviations of titles for names that capitalize more than
- * just the first letter.<br>
- * Bracket token sequences: -lrb- -rrb- -lsb- -rsb- -lcb- -rcb- <br>
- * <br>
- * See http://www.cis.upenn.edu/~treebank/tokenization.html
- *
- * @param line The single-line input string
- * @return The input string modified as described above
- */
- public static String replaceBracketTokens(String line) {
- String result = line;
- result = result.replaceAll("(?iu)" + "-lrb-", "(");
- result = result.replaceAll("(?iu)" + "-rrb-", ")");
- result = result.replaceAll("(?iu)" + "-lsb-", "[");
- result = result.replaceAll("(?iu)" + "-rsb-", "]");
- result = result.replaceAll("(?iu)" + "-lcb-", "{");
- result = result.replaceAll("(?iu)" + "-rcb-", "}");
- return result;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/io/JSONMessage.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/io/JSONMessage.java b/src/joshua/decoder/io/JSONMessage.java
deleted file mode 100644
index 2733db4..0000000
--- a/src/joshua/decoder/io/JSONMessage.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.io;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
-
-import joshua.decoder.Translation;
-
-public class JSONMessage {
- public Data data = null;
- public List<String> rules = null;
-
- public JSONMessage() {
- }
-
- public class Data {
- public List<TranslationItem> translations;
-
- public Data() {
- translations = new ArrayList<TranslationItem>();
- }
- }
-
- public TranslationItem addTranslation(String text) {
- if (data == null)
- data = new Data();
-
- TranslationItem newItem = new TranslationItem(text);
- data.translations.add(newItem);
- return newItem;
- }
-
- public class TranslationItem {
- public String translatedText;
- public List<NBestItem> raw_nbest;
-
- public TranslationItem(String value) {
- this.translatedText = value;
- this.raw_nbest = new ArrayList<NBestItem>();
- }
-
- public void addHypothesis(String hyp, float score) {
- this.raw_nbest.add(new NBestItem(hyp, score));
- }
- }
-
- public class NBestItem {
- public String hyp;
- public float totalScore;
-
- public NBestItem(String hyp, float score) {
- this.hyp = hyp;
- this.totalScore = score;
- }
- }
-
- public void addRule(String rule) {
- if (rules == null)
- rules = new ArrayList<String>();
- rules.add(rule);
- }
-
- public class MetaData {
-
- public MetaData() {
- }
- }
-
- public static JSONMessage buildMessage(Translation translation) {
- JSONMessage message = new JSONMessage();
- String[] results = translation.toString().split("\\n");
- if (results.length > 0) {
- JSONMessage.TranslationItem item = message.addTranslation(translation.getStructuredTranslation().getTranslationString());
-
- for (String result: results) {
- String[] tokens = result.split(" \\|\\|\\| ");
- String rawResult = tokens[1];
- float score = Float.parseFloat(tokens[3]);
- item.addHypothesis(rawResult, score);
- }
- }
- return message;
- }
-
- public String toString() {
- Gson gson = new GsonBuilder().setPrettyPrinting().create();
- return gson.toJson(this);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/io/TranslationRequestStream.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/io/TranslationRequestStream.java b/src/joshua/decoder/io/TranslationRequestStream.java
deleted file mode 100644
index 47f5d81..0000000
--- a/src/joshua/decoder/io/TranslationRequestStream.java
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.io;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.Reader;
-
-import com.google.gson.stream.JsonReader;
-
-import joshua.decoder.JoshuaConfiguration;
-import joshua.decoder.JoshuaConfiguration.INPUT_TYPE;
-import joshua.decoder.MetaDataException;
-import joshua.decoder.segment_file.Sentence;
-
-/**
- * This class iterates over an input stream, looking for inputs to translate. By default, it
- * expects plain-text input, which can be plain sentences or PLF-encoded lattices. If
- * '-input-type json' is passed to the decoder, it will instead read JSON objects from the input
- * stream, with the following format:
- *
- * {
- * "data": {
- * "translations": [
- * { "sourceText": "sentence to be translated" },
- * { "sourceText": "next sentence" },
- * { "sourceText": "@some command to run" }
- * ]
- * }
- * }
- *
- * @author Matt Post <po...@cs.jhu.edu>
- * @author orluke
- */
-public class TranslationRequestStream {
- private final JoshuaConfiguration joshuaConfiguration;
- private int sentenceNo = -1;
-
- private Sentence nextSentence = null;
-
- /* Plain text or JSON input */
- private StreamHandler requestHandler = null;
-
- /* Whether the request has been killed by a broken client connection. */
- private volatile boolean isShutDown = false;
-
- public TranslationRequestStream(BufferedReader reader, JoshuaConfiguration joshuaConfiguration) {
- this.joshuaConfiguration = joshuaConfiguration;
-
- if (joshuaConfiguration.input_type == INPUT_TYPE.json) {
- this.requestHandler = new JSONStreamHandler(reader);
- } else {
- this.requestHandler = new PlaintextStreamHandler(reader);
- }
- }
-
- private interface StreamHandler {
- Sentence next() throws IOException, MetaDataException;
- }
-
- private class JSONStreamHandler implements StreamHandler {
-
- private JsonReader reader = null;
- private String line = null;
-
- public JSONStreamHandler(Reader in) {
- reader = new JsonReader(in);
- try {
- reader.beginObject();
- reader.nextName(); // "data"
- reader.beginObject();
- reader.nextName(); // "translations"
- reader.beginArray();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- @Override
- public Sentence next() throws IOException, MetaDataException {
- line = null;
-
- if (reader.hasNext()) {
- reader.beginObject();
- reader.nextName();
- line = reader.nextString();
- reader.endObject();
- }
-
- if (line == null)
- return null;
-
- if (line.startsWith("@"))
- throw new MetaDataException(line);
-
- return new Sentence(line, -1, joshuaConfiguration);
- }
- }
-
- private class PlaintextStreamHandler implements StreamHandler {
-
- private BufferedReader reader = null;
-
- public PlaintextStreamHandler(BufferedReader in) {
- reader = in;
- }
-
- @Override
- public Sentence next() throws IOException, MetaDataException {
-
- String line = reader.readLine();
-
- if (line != null) {
- if (line.startsWith("@"))
- throw new MetaDataException(line);
-
- return new Sentence(line, sentenceNo, joshuaConfiguration);
- }
-
- return null;
- }
- }
-
- public int size() {
- return sentenceNo + 1;
- }
-
- /*
- * Returns the next sentence item, then sets it to null, so that hasNext() will know to produce a
- * new one.
- */
- public synchronized Sentence next() throws MetaDataException {
- nextSentence = null;
-
- if (isShutDown)
- return null;
-
- try {
- nextSentence = requestHandler.next();
- if (nextSentence != null) {
- sentenceNo++;
- nextSentence.id = sentenceNo;
- }
- } catch (IOException e) {
- this.shutdown();
- }
-
- return nextSentence;
- }
-
- /**
- * When the client socket is interrupted, we need to shut things down. On the source side, the
- * TranslationRequest could easily have buffered a lot of lines and so will keep discovering
- * sentences to translate, but the output Translation objects will start throwing exceptions when
- * trying to print to the closed socket. When that happens, we call this function() so that we can
- * tell next() to stop returning translations, which in turn will cause it to stop asking for
- * them.
- *
- * Note that we don't go to the trouble of shutting down existing DecoderThreads. This would be
- * good to do, but for the moment would require more bookkeeping than we want to do.
- */
-
- public void shutdown() {
- isShutDown = true;
- }
-
- public boolean isShutDown() {
- return isShutDown;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/package.html
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/package.html b/src/joshua/decoder/package.html
deleted file mode 100644
index fda252e..0000000
--- a/src/joshua/decoder/package.html
+++ /dev/null
@@ -1,21 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides infrastructure and wrapper code used relevant to
-hierarchical phrase-based decoding for statistical machine translation.
-<p>
-This package does not include an implementation of any actual decoding algorithm.
-Rather, such code is in child packages of this package.
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/phrase/Candidate.java b/src/joshua/decoder/phrase/Candidate.java
deleted file mode 100644
index 4b8b6a6..0000000
--- a/src/joshua/decoder/phrase/Candidate.java
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.phrase;
-
-/***
- * A candidate is basically a cube prune state. It contains a list of hypotheses and target
- * phrases, and an instantiated candidate is a pair of indices that index these two lists. This
- * is the "cube prune" position.
- */
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import joshua.corpus.Span;
-import joshua.decoder.chart_parser.ComputeNodeResult;
-import joshua.decoder.ff.state_maintenance.DPState;
-import joshua.decoder.ff.tm.Rule;
-import joshua.decoder.hypergraph.HGNode;
-
-public class Candidate {
-
- // the set of hypotheses that can be paired with phrases from this span
- private List<Hypothesis> hypotheses;
-
- // the list of target phrases gathered from a span of the input
- private TargetPhrases phrases;
-
- // source span of new phrase
- public Span span;
-
- // future cost of applying phrases to hypotheses
- float future_delta;
-
- // indices into the hypotheses and phrases arrays (used for cube pruning)
- private int[] ranks;
-
- // scoring and state information
- private ComputeNodeResult result;
-
- /**
- * When candidate objects are extended, the new one is initialized with the same underlying
- * "phrases" and "hypotheses" and "span" objects. So these all have to be equal, as well as
- * the ranks.
- *
- * This is used to prevent cube pruning from adding the same candidate twice, having reached
- * a point in the cube via different paths.
- */
- @Override
- public boolean equals(Object obj) {
- if (obj instanceof Candidate) {
- Candidate other = (Candidate) obj;
- if (hypotheses != other.hypotheses || phrases != other.phrases || span != other.span)
- return false;
-
- if (ranks.length != other.ranks.length)
- return false;
-
- for (int i = 0; i < ranks.length; i++)
- if (ranks[i] != other.ranks[i])
- return false;
-
- return true;
- }
- return false;
- }
-
- @Override
- public int hashCode() {
- return 17 * hypotheses.size()
- + 23 * phrases.size()
- + 57 * span.hashCode()
- + 117 * Arrays.hashCode(ranks);
-// return hypotheses.hashCode() * phrases.hashCode() * span.hashCode() * Arrays.hashCode(ranks);
- }
-
- @Override
- public String toString() {
- return String.format("CANDIDATE(hyp %d/%d, phr %d/%d) [%s] phrase=[%s] span=%s",
- ranks[0], hypotheses.size(), ranks[1], phrases.size(),
- getHypothesis(), getRule().getEnglishWords().replaceAll("\\[.*?\\] ",""), getSpan());
- }
-
- public Candidate(List<Hypothesis> hypotheses, TargetPhrases phrases, Span span, float delta) {
- this.hypotheses = hypotheses;
- this.phrases = phrases;
- this.span = span;
- this.future_delta = delta;
- this.ranks = new int[] { 0, 0 };
- }
-
- public Candidate(List<Hypothesis> hypotheses, TargetPhrases phrases, Span span, float delta, int[] ranks) {
- this.hypotheses = hypotheses;
- this.phrases = phrases;
- this.span = span;
- this.future_delta = delta;
- this.ranks = ranks;
-// this.score = hypotheses.get(ranks[0]).score + phrases.get(ranks[1]).getEstimatedCost();
- }
-
- /**
- * Extends the cube pruning dot in both directions and returns the resulting set. Either of the
- * results can be null if the end of their respective lists is reached.
- *
- * @return The neighboring candidates (possibly null)
- */
- public Candidate[] extend() {
- return new Candidate[] { extendHypothesis(), extendPhrase() };
- }
-
- /**
- * Extends the cube pruning dot along the dimension of existing hypotheses.
- *
- * @return the next candidate, or null if none
- */
- public Candidate extendHypothesis() {
- if (ranks[0] < hypotheses.size() - 1) {
- return new Candidate(hypotheses, phrases, span, future_delta, new int[] { ranks[0] + 1, ranks[1] });
- }
- return null;
- }
-
- /**
- * Extends the cube pruning dot along the dimension of candidate target sides.
- *
- * @return the next Candidate, or null if none
- */
- public Candidate extendPhrase() {
- if (ranks[1] < phrases.size() - 1) {
- return new Candidate(hypotheses, phrases, span, future_delta, new int[] { ranks[0], ranks[1] + 1 });
- }
-
- return null;
- }
-
- /**
- * Returns the input span from which the phrases for this candidates were gathered.
- *
- * @return the span object
- */
- public Span getSpan() {
- return this.span;
- }
-
- /**
- * A candidate is a (hypothesis, target phrase) pairing. The hypothesis and target phrase are
- * drawn from a list that is indexed by (ranks[0], ranks[1]), respectively. This is a shortcut
- * to return the hypothesis of the candidate pair.
- *
- * @return the hypothesis at position ranks[0]
- */
- public Hypothesis getHypothesis() {
- return this.hypotheses.get(ranks[0]);
- }
-
- /**
- * This returns the target side {@link Phrase}, which is a {@link Rule} object. This is just a
- * convenience function that works by returning the phrase indexed in ranks[1].
- *
- * @return the phrase at position ranks[1]
- */
- public Rule getRule() {
- return phrases.get(ranks[1]);
- }
-
- /**
- * The hypotheses list is a list of tail pointers. This function returns the tail pointer
- * currently selected by the value in ranks.
- *
- * @return a list of size one, wrapping the tail node pointer
- */
- public List<HGNode> getTailNodes() {
- List<HGNode> tailNodes = new ArrayList<HGNode>();
- tailNodes.add(getHypothesis());
- return tailNodes;
- }
-
- /**
- * Returns the bit vector of this hypothesis. The bit vector is computed by ORing the coverage
- * vector of the tail node (hypothesis) and the source span of phrases in this candidate.
- * @return
- */
- public Coverage getCoverage() {
- Coverage cov = new Coverage(getHypothesis().getCoverage());
- cov.set(getSpan());
- return cov;
- }
-
- /**
- * Sets the result of a candidate (should just be moved to the constructor).
- *
- * @param result
- */
- public void setResult(ComputeNodeResult result) {
- this.result = result;
- }
-
- /**
- * This returns the sum of two costs: the HypoState cost + the transition cost. The HypoState cost
- * is in turn the sum of two costs: the Viterbi cost of the underlying hypothesis, and the adjustment
- * to the future score incurred by translating the words under the source phrase being added.
- * The transition cost is the sum of new features incurred along the transition (mostly, the
- * language model costs).
- *
- * The Future Cost item should probably just be implemented as another kind of feature function,
- * but it would require some reworking of that interface, which isn't worth it.
- *
- * @return
- */
- public float score() {
- return getHypothesis().getScore() + future_delta + result.getTransitionCost();
- }
-
- public float getFutureEstimate() {
- return getHypothesis().getScore() + future_delta;
- }
-
- public List<DPState> getStates() {
- return result.getDPStates();
- }
-
- public ComputeNodeResult getResult() {
- return result;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/phrase/CandidateComparator.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/phrase/CandidateComparator.java b/src/joshua/decoder/phrase/CandidateComparator.java
deleted file mode 100644
index 2526ed6..0000000
--- a/src/joshua/decoder/phrase/CandidateComparator.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.phrase;
-
-import java.util.Comparator;
-
-public class CandidateComparator implements Comparator<Candidate> {
- @Override
- public int compare(Candidate one, Candidate another) {
- return Float.compare(another.score(), one.score());
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/phrase/Coverage.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/phrase/Coverage.java b/src/joshua/decoder/phrase/Coverage.java
deleted file mode 100644
index 398c7a0..0000000
--- a/src/joshua/decoder/phrase/Coverage.java
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.phrase;
-
-import java.util.BitSet;
-
-import joshua.corpus.Span;
-
-/**
- * Represents a coverage vector. The vector is relative to a hypothesis. {firstZero} denotes the
- * first uncovered word of the sentence, and {bits} contains the coverage vector of all the words
- * after it, with the first zero removed.
- */
-
-public class Coverage {
-
- // The index of the first uncovered word
- private int firstZero;
-
- // Bits with the first zero removed.
- // We also assume anything beyond this is zero due to the reordering window.
- // Lowest bits correspond to next word.
- private BitSet bits;
-
- // Default bit vector length
- private static int INITIAL_LENGTH = 10;
-
- public Coverage() {
- firstZero = 0;
- bits = new BitSet(INITIAL_LENGTH);
- }
-
- public Coverage(int firstZero) {
- this.firstZero = firstZero;
- bits = new BitSet(INITIAL_LENGTH);
- }
-
- /**
- * Pretty-prints the coverage vector, making a guess about the length
- */
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append(String.format("%d ", firstZero));
-
- for (int i = 0; i < Math.max(INITIAL_LENGTH, bits.length()); i++) { // only display first 10 bits
- sb.append(bits.get(i) ? "x" : ".");
- }
-
- return sb.toString();
- }
-
- /**
- * Initialize a coverage vector from another Coverage vector, creating a separate object.
- *
- * @param firstZero
- * @param bits
- */
- public Coverage(Coverage other) {
- this.firstZero = other.firstZero;
- this.bits = (BitSet) other.bits.clone();
- }
-
- /**
- * Turns on all bits from position start to position (end - 1), that is, in the range [start .. end).
- * This is done relative to the current coverage vector, of course, which may not start at 0.
- *
- * @param begin
- * @param end
- */
- public void set(int begin, int end) {
- assert compatible(begin, end);
-
-// StringBuffer sb = new StringBuffer();
-// sb.append(String.format("SET(%d,%d) %s", begin, end, this));
-
- if (begin == firstZero) {
- // A concatenation.
- firstZero = end;
- bits = bits.get(end - begin, Math.max(end - begin, bits.length()));
- int firstClear = bits.nextClearBit(0);
- if (firstClear != 0) {
- // We might have exactly covered a gap, in which case we need to adjust shift
- // firstZero and the bits until we reach the new end
- firstZero += firstClear;
- bits = bits.get(firstClear, bits.length());
- }
- } else {
- // Set the bits relative to the currenS
- bits.or(pattern(begin, end));
- }
-
-// sb.append(String.format(" -> %s", this));
-// System.err.println(sb);
- }
-
- /**
- * Convenience function.
- */
- public final void set(Span span) {
- set(span.start, span.end);
- }
-
- /**
- * Tests whether a new range is compatible with the current coverage vector. It must be after
- * the first uncovered word, obviously, and must not conflict with spans after the first
- * uncovered word.
- *
- * @param begin the begin index (absolute)
- * @param end the end index (absolute)
- * @return true if the span is compatible with the coverage vector
- */
- public boolean compatible(int begin, int end) {
- if (begin >= firstZero) {
- BitSet pattern = new BitSet();
- pattern.set(begin - firstZero, end - firstZero);
- return ! bits.intersects(pattern);
- }
- return false;
- }
-
- /**
- * Returns the source sentence index of the first uncovered word.
- *
- * @return the index
- */
- public int firstZero() {
- return firstZero;
- }
-
- /**
- * LeftOpen() and RightOpen() find the larger gap in which a new source phrase pair sits.
- * When using a phrase pair covering (begin, end), the pair
- *
- * (LeftOpen(begin), RightOpen(end, sentence_length))
- *
- * provides this gap.
-
- * Find the left bound of the gap in which the phrase [begin, ...) sits.
- *
- * @param begin the start index of the phrase being applied.
- * @return
- */
- public int leftOpening(int begin) {
- for (int i = begin - firstZero; i > 0; --i) {
- if (bits.get(i)) {
- assert compatible(i + firstZero + 1, begin);
- assert !compatible(i + firstZero, begin);
- return i + firstZero + 1;
- }
- }
-
- assert compatible(firstZero, begin);
- return firstZero;
- }
-
- /**
- * LeftOpen() and RightOpen() find the larger gap in which a new source phrase pair sits.
- * When using a phrase pair covering (begin, end), the pair
- *
- * (LeftOpen(begin), RightOpen(end, sentence_length))
- *
- * provides this gap.
- *
- * Finds the right bound of the enclosing gap, or the end of sentence, whichever is less.
- */
- public int rightOpening(int end, int sentenceLength) {
- for (int i = end - firstZero; i < Math.min(64, sentenceLength - firstZero); i++) {
- if (bits.get(i)) {
- return i + firstZero;
- }
- }
- return sentenceLength;
- }
-
- /**
- * Creates a bit vector with the same offset as the current coverage vector, flipping on
- * bits begin..end.
- *
- * @param begin the begin index (absolute)
- * @param end the end index (absolute)
- * @return a bit vector (relative) with positions [begin..end) on
- */
- public BitSet pattern(int begin, int end) {
-// System.err.println(String.format("pattern(%d,%d) %d %s %s", begin, end, firstZero, begin >= firstZero, toString()));
- assert begin >= firstZero;
- BitSet pattern = new BitSet(INITIAL_LENGTH);
- pattern.set(begin - firstZero, end - firstZero);
- return pattern;
- }
-
- /**
- * Returns the underlying coverage bits.
- *
- * @return
- */
- public BitSet getCoverage() {
- return bits;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj instanceof Coverage) {
- Coverage other = (Coverage) obj;
- return getCoverage().equals(other.getCoverage()) && firstZero() == other.firstZero();
- }
-
- return false;
- }
-
- @Override
- public int hashCode() {
- return getCoverage().hashCode() * firstZero();
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/phrase/CoverageTest.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/phrase/CoverageTest.java b/src/joshua/decoder/phrase/CoverageTest.java
deleted file mode 100644
index 90bcbaf..0000000
--- a/src/joshua/decoder/phrase/CoverageTest.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.phrase;
-
-import static org.junit.Assert.*;
-
-import java.util.BitSet;
-
-import org.junit.Test;
-
-public class CoverageTest {
-
- @Test
- public void testSet() {
- Coverage cov = new Coverage();
- cov.set(1,2);
- cov.set(3,4);
- cov.set(2,3);
- cov.set(0,1);
-
- assertFalse(cov.compatible(0, 1));
- assertFalse(cov.compatible(0, 5));
- assertTrue(cov.compatible(4, 6));
-
- assertEquals(cov.toString(), "4 ..........");
- }
-
- @Test
- public void testPattern() {
- Coverage cov = new Coverage();
- cov.set(5,6);
- cov.set(0,4);
- BitSet bits = cov.pattern(4, 5);
- BitSet answerBits = new BitSet();
- answerBits.set(0);
- assertEquals(bits, answerBits);
- }
-
- @Test
- public void testCopyConstructor() {
- Coverage a = new Coverage();
- a.set(2,3);
- Coverage b = new Coverage(a);
- b.set(4,5);
-
- assertFalse(a.toString().equals(b.toString()));
- }
-
- @Test
- public void testCompatible() {
- Coverage a = new Coverage();
- a.set(10, 14);
-
- assertTrue(a.compatible(14, 16));
- assertTrue(a.compatible(6, 10));
- assertTrue(a.compatible(1, 10));
- assertTrue(a.compatible(1, 9));
- assertFalse(a.compatible(9, 11));
- assertFalse(a.compatible(13, 15));
- assertFalse(a.compatible(9, 15));
- assertFalse(a.compatible(9, 14));
- assertFalse(a.compatible(10, 15));
-
- a.set(0,9);
-
- for (int width = 1; width <= 3; width++) {
- for (int i = 0; i < 20; i++) {
- int j = i + width;
- if ((i == 9 && j == 10) || i >= 14)
- assertTrue(a.compatible(i,j));
- else {
-// System.err.println(String.format("%d,%d -> %s %s", i, j, a.compatible(i,j), a));
- assertFalse(a.compatible(i,j));
- }
- }
- }
- }
-
- @Test
- public void testFirstZero() {
- Coverage cov = new Coverage();
- cov.set(2, 5);
- assertEquals(cov.firstZero(), 0);
- cov.set(8,10);
- assertEquals(cov.firstZero(), 0);
- cov.set(0, 2);
- assertEquals(cov.firstZero(), 5);
- cov.set(5, 7);
- assertEquals(cov.firstZero(), 7);
- cov.set(7,8);
- assertEquals(cov.firstZero(), 10);
- }
-
- @Test
- public void testOpenings() {
- Coverage cov = new Coverage();
- cov.set(0, 2);
- cov.set(8, 10);
-
- for (int i = 2; i < 7; i++) {
- assertEquals(cov.leftOpening(i), 2);
- assertEquals(cov.rightOpening(i, 17), 8);
- assertEquals(cov.rightOpening(i, 7), 7);
- }
- }
-
- @Test
- public void testEquals() {
- Coverage cov = new Coverage();
- cov.set(9, 11);
- Coverage cov2 = new Coverage();
- cov2.set(9,10);
- cov2.set(10,11);
- assertEquals(cov, cov2);
- }
-
- @Test
- public void testToString() {
- Coverage cov = new Coverage();
- cov.set(0, 40);
- cov.set(44, 49);
- assertEquals(cov.toString(), "40 ....xxxxx.");
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/phrase/Future.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/phrase/Future.java b/src/joshua/decoder/phrase/Future.java
deleted file mode 100644
index 22a0225..0000000
--- a/src/joshua/decoder/phrase/Future.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.phrase;
-
-/***
- * This class represents the future cost of a hypothesis. The future cost of a hypothesis is the
- * cost of covering all uncovered words. The way this is computed is with a simple dynamic program
- * that computes, for each span of the input, the best possible way to cover that span with
- * phrases from the phrase table. No non-local features (e.g., the language model cost) are used
- * in computing this estimate.
- */
-
-import joshua.decoder.Decoder;
-import joshua.util.ChartSpan;
-
-public class Future {
-
- // Square matrix with half the values ignored.
- private ChartSpan<Float> entries;
-
- private int sentlen;
-
- /**
- * Computes bottom-up the best way to cover all spans of the input sentence, using the phrases
- * that have been assembled in a {@link PhraseChart}. Requires that there be a translation at least
- * for every word (which can be accomplished with a pass-through grammar).
- *
- * @param chart
- */
- public Future(PhraseChart chart) {
-
- sentlen = chart.SentenceLength();
- entries = new ChartSpan<Float>(sentlen + 1, Float.NEGATIVE_INFINITY);
-
- /*
- * The sentence is represented as a sequence of words, with the first and last words set
- * to <s> and </s>. We start indexing at 1 because the first word (<s>) is always covered.
- */
- for (int begin = 1; begin <= chart.SentenceLength(); begin++) {
- // Nothing is nothing (this is a useful concept when two phrases abut)
- setEntry(begin, begin, 0.0f);
- // Insert phrases
- int max_end = Math.min(begin + chart.MaxSourcePhraseLength(), chart.SentenceLength());
- for (int end = begin + 1; end <= max_end; end++) {
-
- // Moses doesn't include the cost of applying </s>, so force it to zero
- if (begin == sentlen - 1 && end == sentlen)
- setEntry(begin, end, 0.0f);
- else {
- TargetPhrases phrases = chart.getRange(begin, end);
- if (phrases != null)
- setEntry(begin, end, phrases.get(0).getEstimatedCost());
- }
- }
- }
-
- // All the phrases are in, now do minimum dynamic programming. Lengths 0 and 1 were already handled above.
- for (int length = 2; length <= chart.SentenceLength(); length++) {
- for (int begin = 1; begin <= chart.SentenceLength() - length; begin++) {
- for (int division = begin + 1; division < begin + length; division++) {
- setEntry(begin, begin + length, Math.max(getEntry(begin, begin + length), getEntry(begin, division) + getEntry(division, begin + length)));
- }
- }
- }
-
- if (Decoder.VERBOSE >= 3) {
- for (int i = 1; i < chart.SentenceLength(); i++)
- for (int j = i + 1; j < chart.SentenceLength(); j++)
- System.err.println(String.format("future cost from %d to %d is %.3f", i-1, j-2, getEntry(i, j)));
- }
- }
-
- public float Full() {
-// System.err.println("Future::Full(): " + Entry(1, sentlen));
- return getEntry(1, sentlen);
- }
-
- /**
- * Calculate change in rest cost when the given coverage is to be covered.
- */
- public float Change(Coverage coverage, int begin, int end) {
- int left = coverage.leftOpening(begin);
- int right = coverage.rightOpening(end, sentlen);
-// System.err.println(String.format("Future::Change(%s, %d, %d) left %d right %d %.3f %.3f %.3f", coverage, begin, end, left, right,
-// Entry(left, begin), Entry(end, right), Entry(left, right)));
- return getEntry(left, begin) + getEntry(end, right) - getEntry(left, right);
- }
-
- private float getEntry(int begin, int end) {
- assert end >= begin;
- assert end < this.sentlen;
- return entries.get(begin, end);
- }
-
- private void setEntry(int begin, int end, float value) {
- assert end >= begin;
- assert end < this.sentlen;
-// System.err.println(String.format("future cost from %d to %d is %.5f", begin, end, value));
- entries.set(begin, end, value);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/phrase/Header.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/phrase/Header.java b/src/joshua/decoder/phrase/Header.java
deleted file mode 100644
index 2a8370d..0000000
--- a/src/joshua/decoder/phrase/Header.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.phrase;
-
-// PORT: done
-
-import java.util.Comparator;
-
-public class Header implements Comparable<Header>, Comparator<Header> {
- private float score;
- private int arity;
- private Note note;
-
- protected Header() {
- score = 0.0f;
- arity = 0;
- note = null;
- }
-
- protected Header(Header other) {
- this.score = other.GetScore();
- this.arity = other.GetArity();
- this.note = other.GetNote();
- }
-
- protected Header(int arity) {
- this.score = 0.0f;
- this.arity = arity;
- this.note = new Note();
- }
-
- public boolean Valid() {
- // C++: return base_;
- System.err.println("Header::Valid(): " + (note != null));
- return note != null;
- }
-
- public float GetScore() {
- return score;
- }
-
- public void SetScore(float score) {
- this.score = score;
- }
-
- public int GetArity() { return arity; }
-
- public Note GetNote() { return note; }
-
- public void SetNote(Note note) { this.note = note; }
-
- @Override
- public int compareTo(Header other) {
- if (this.GetScore() < other.GetScore())
- return -1;
- else if (this.GetScore() > other.GetScore())
- return 1;
- return 0;
- }
-
- @Override
- public int compare(Header arg0, Header arg1) {
- return arg0.compareTo(arg1);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/phrase/Hypothesis.java b/src/joshua/decoder/phrase/Hypothesis.java
deleted file mode 100644
index 3d4bf51..0000000
--- a/src/joshua/decoder/phrase/Hypothesis.java
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.phrase;
-
-import java.util.List;
-
-import joshua.corpus.Vocabulary;
-import joshua.decoder.ff.state_maintenance.DPState;
-import joshua.decoder.ff.tm.Rule;
-import joshua.decoder.ff.tm.format.HieroFormatReader;
-import joshua.decoder.hypergraph.HGNode;
-import joshua.decoder.hypergraph.HyperEdge;
-
-/**
- * Represents a hypothesis, a translation of some coverage of the input. Extends {@link HGNode},
- * through a bit of a hack. Whereas (i,j) represents the span of an {@link HGNode}, i here is not used,
- * and j is overloaded to denote the span of the phrase being applied. The complete coverage vector
- * can be obtained by looking at the tail pointer and casting it.
- *
- * @author Kenneth Heafield
- * @author Matt Post <po...@cs.jhu.edu>
- */
-public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
-
- // The hypothesis' coverage vector
- private Coverage coverage;
-
- public static Rule BEGIN_RULE = new HieroFormatReader().parseLine("[X] ||| <s> ||| <s> ||| ||| 0-0");
- public static Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] </s> ||| [X,1] </s> ||| ||| 0-0 1-1");
-
- public String toString() {
- StringBuffer sb = new StringBuffer();
- for (DPState state: getDPStates())
- sb.append(state);
- String words = bestHyperedge.getRule().getEnglishWords();
-// return String.format("HYP[%s] %.5f j=%d words=%s state=%s", coverage, score, j, words, sb);
- return String.format("HYP[%s] j=%d words=[%s] state=%s", coverage, j, words, sb);
- }
-
- // Initialize root hypothesis. Provide the LM's BeginSentence.
- public Hypothesis(List<DPState> states, float futureCost) {
- super(0, 1, Vocabulary.id("[X]"), states,
- new HyperEdge(BEGIN_RULE, 0.0f, 0.0f, null, null), futureCost);
- this.coverage = new Coverage(1);
- }
-
- public Hypothesis(Candidate cand) {
- // TODO: sourcepath
- super(-1, cand.span.end, Vocabulary.id("[X]"), cand.getStates(), new HyperEdge(
- cand.getRule(), cand.getResult().getViterbiCost(), cand.getResult().getTransitionCost(),
- cand.getTailNodes(), null), cand.score());
- this.coverage = cand.getCoverage();
- }
-
- // Extend a previous hypothesis.
- public Hypothesis(List<DPState> states, float score, Hypothesis previous, int source_end, Rule target) {
- super(-1, source_end, -1, null, null, score);
- this.coverage = previous.coverage;
- }
-
- public Coverage getCoverage() {
- return coverage;
- }
-
- public Rule getRule() {
- return bestHyperedge.getRule();
- }
-
- /**
- * HGNodes (designed for chart parsing) maintain a span (i,j). We overload j
- * here to record the index of the last translated source word.
- *
- * @return
- */
- public int LastSourceIndex() {
- return j;
- }
-
- @Override
- public int hashCode() {
- int hash = 0;
- hash = 31 * LastSourceIndex() + 19 * getCoverage().hashCode();
- if (null != dpStates && dpStates.size() > 0)
- for (DPState dps: dpStates)
- hash *= 57 + dps.hashCode();
- return hash;
- }
-
- /**
- * Defines equivalence in terms of recombinability. Two hypotheses are recombinable if
- * all their DP states are the same, their coverage is the same, and they have the next soure
- * index the same.
- */
- @Override
- public boolean equals(Object obj) {
- if (obj instanceof Hypothesis) {
- Hypothesis other = (Hypothesis) obj;
-
- if (LastSourceIndex() != other.LastSourceIndex() || ! getCoverage().equals(other.getCoverage()))
- return false;
-
- if (dpStates == null)
- return (other.dpStates == null);
-
- if (other.dpStates == null)
- return false;
-
- if (dpStates.size() != other.dpStates.size())
- return false;
-
- for (int i = 0; i < dpStates.size(); i++) {
- if (!dpStates.get(i).equals(other.dpStates.get(i)))
- return false;
- }
-
- return true;
- }
- return false;
- }
-
- @Override
- public int compareTo(Hypothesis o) {
- // TODO: is this the order we want?
- return Float.compare(o.getScore(), getScore());
- }
-
- /**
- * Performs hypothesis recombination, incorporating the incoming hyperedges of the added
- * hypothesis and possibly updating the cache of the best incoming hyperedge and score.
- *
- * @param added the equivalent hypothesis
- */
- public void absorb(Hypothesis added) {
- assert(this.equals(added));
- score = Math.max(score, added.getScore());
- addHyperedgesInNode(added.hyperedges);
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/phrase/Note.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/phrase/Note.java b/src/joshua/decoder/phrase/Note.java
deleted file mode 100644
index 19e6f62..0000000
--- a/src/joshua/decoder/phrase/Note.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.phrase;
-
-// PORT: done
-
-public class Note {
- public Object value;
-
- public String toString() {
- return value.toString();
- }
-
- public Note() {
- }
-
- public Note(Object value) {
- this.value = value;
- }
-
- public Object get() {
- return value;
- }
-
- public void set(Object object) {
- this.value = object;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8cdbc4b8/src/joshua/decoder/phrase/PhraseChart.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/phrase/PhraseChart.java b/src/joshua/decoder/phrase/PhraseChart.java
deleted file mode 100644
index a0179ff..0000000
--- a/src/joshua/decoder/phrase/PhraseChart.java
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package joshua.decoder.phrase;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import joshua.decoder.Decoder;
-import joshua.decoder.ff.FeatureFunction;
-import joshua.decoder.ff.tm.Rule;
-import joshua.decoder.ff.tm.RuleCollection;
-import joshua.decoder.segment_file.Sentence;
-
-/**
- * This class represents a bundle of phrase tables that have been read in,
- * reporting some stats about them. Probably could be done away with.
- */
-public class PhraseChart {
-
- private int sentence_length;
- private int max_source_phrase_length;
-
- // Banded array: different source lengths are next to each other.
- private List<TargetPhrases> entries;
-
- // number of translation options
- int numOptions = 20;
- private List<FeatureFunction> features;
-
- /**
- * Create a new PhraseChart object, which represents all phrases that are
- * applicable against the current input sentence. These phrases are extracted
- * from all available grammars.
- *
- * @param tables
- * @param source
- */
- public PhraseChart(PhraseTable[] tables, List<FeatureFunction> features, Sentence source,
- int num_options) {
-
- float startTime = System.currentTimeMillis();
-
- this.numOptions = num_options;
- this.features = features;
-
- max_source_phrase_length = 0;
- for (int i = 0; i < tables.length; i++)
- max_source_phrase_length = Math.max(max_source_phrase_length,
- tables[i].getMaxSourcePhraseLength());
- sentence_length = source.length();
-
-// System.err.println(String.format(
-// "PhraseChart()::Initializing chart for sentlen %d max %d from %s", sentence_length,
-// max_source_phrase_length, source));
-
- entries = new ArrayList<TargetPhrases>();
- for (int i = 0; i < sentence_length * max_source_phrase_length; i++)
- entries.add(null);
-
- // There's some unreachable ranges off the edge. Meh.
- for (int begin = 0; begin != sentence_length; ++begin) {
- for (int end = begin + 1; (end != sentence_length + 1)
- && (end <= begin + max_source_phrase_length); ++end) {
- if (source.hasPath(begin, end)) {
- for (PhraseTable table : tables)
- addToRange(begin, end,
- table.getPhrases(Arrays.copyOfRange(source.getWordIDs(), begin, end)));
- }
-
- }
- }
-
- for (TargetPhrases phrases : entries) {
- if (phrases != null)
- phrases.finish(features, Decoder.weights, num_options);
- }
-
- Decoder.LOG(1, String.format("Input %d: Collecting options took %.3f seconds", source.id(),
- (System.currentTimeMillis() - startTime) / 1000.0f));
-
- if (Decoder.VERBOSE(3)) {
- for (int i = 1; i < sentence_length - 1; i++) {
- for (int j = i + 1; j < sentence_length && j <= i + max_source_phrase_length; j++) {
- if (source.hasPath(i, j)) {
- TargetPhrases phrases = getRange(i, j);
- if (phrases != null) {
- System.err.println(String.format("%s (%d-%d)", source.source(i,j), i, j));
- for (Rule rule: phrases)
- System.err.println(String.format(" %s :: est=%.3f", rule.getEnglishWords(), rule.getEstimatedCost()));
- }
- }
- }
- }
- }
- }
-
- public int SentenceLength() {
- return sentence_length;
- }
-
- // c++: TODO: make this reflect the longest source phrase for this sentence.
- public int MaxSourcePhraseLength() {
- return max_source_phrase_length;
- }
-
- /**
- * Maps two-dimensional span into a one-dimensional array.
- *
- * @param i
- * @param j
- * @return offset into private list of TargetPhrases
- */
- private int offset(int i, int j) {
- return i * max_source_phrase_length + j - i - 1;
- }
-
- /**
- * Returns phrases from all grammars that match the span.
- *
- * @param begin
- * @param end
- * @return
- */
- public TargetPhrases getRange(int begin, int end) {
- int index = offset(begin, end);
- // System.err.println(String.format("PhraseChart::Range(%d,%d): found %d entries",
- // begin, end,
- // entries.get(index) == null ? 0 : entries.get(index).size()));
- // if (entries.get(index) != null)
- // for (Rule phrase: entries.get(index))
- // System.err.println(" RULE: " + phrase);
-
- if (index < 0 || index >= entries.size() || entries.get(index) == null)
- return null;
-
- return entries.get(index);
- }
-
- /**
- * Add a set of phrases from a grammar to the current span.
- *
- * @param begin
- * @param end
- * @param to
- */
- private void addToRange(int begin, int end, RuleCollection to) {
- if (to != null) {
- /*
- * This first call to getSortedRules() is important, because it is what
- * causes the scoring and sorting to happen. It is also a synchronized call,
- * which is necessary because the underlying grammar gets sorted. Subsequent calls to get the
- * rules will just return the already-sorted list. Here, we score, sort,
- * and then trim the list to the number of translation options. Trimming provides huge
- * performance gains --- the more common the word, the more translations options it is
- * likely to have (often into the tens of thousands).
- */
- List<Rule> rules = to.getSortedRules(features);
- if (numOptions > 0 && rules.size() > numOptions)
- rules = rules.subList(0, numOptions);
-// to.getRules().subList(numOptions, to.getRules().size()).clear();
-
- try {
- int offset = offset(begin, end);
- if (entries.get(offset) == null)
- entries.set(offset, new TargetPhrases(rules));
- else
- entries.get(offset).addAll(rules);
- } catch (java.lang.IndexOutOfBoundsException e) {
- System.err.println(String.format("Whoops! %s [%d-%d] too long (%d)", to, begin, end,
- entries.size()));
- }
- }
- }
-}