You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/17 12:27:55 UTC
[06/14] incubator-joshua git commit: Joshua 7 configuration system
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java b/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
index 2ac5269..d00bc4d 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
@@ -18,73 +18,124 @@
*/
package org.apache.joshua.decoder;
+import static com.google.common.base.Preconditions.checkState;
+
import java.io.BufferedReader;
+import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.InetSocketAddress;
+import java.util.HashMap;
+import java.util.Map;
-import org.apache.joshua.decoder.JoshuaConfiguration.SERVER_TYPE;
import org.apache.joshua.decoder.io.TranslationRequestStream;
import org.apache.joshua.server.ServerThread;
import org.apache.joshua.server.TcpServer;
import org.apache.log4j.Level;
import org.apache.log4j.LogManager;
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+import org.kohsuke.args4j.spi.MapOptionHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.base.Throwables;
import com.sun.net.httpserver.HttpServer;
+import com.typesafe.config.Config;
+import com.typesafe.config.ConfigFactory;
+import com.typesafe.config.ConfigParseOptions;
+import com.typesafe.config.ConfigRenderOptions;
/**
- * Implements decoder initialization, including interaction with <code>JoshuaConfiguration</code>
- * and <code>DecoderTask</code>.
+ * Command-line tool for the Joshua Decoder.
*
* @author Zhifei Li, zhifei.work@gmail.com
* @author wren ng thornton wren@users.sourceforge.net
* @author Lane Schwartz dowobeha@users.sourceforge.net
+ * @author Felix Hieber felix.hieber@gmail.com
*/
public class JoshuaDecoder {
private static final Logger LOG = LoggerFactory.getLogger(JoshuaDecoder.class);
+
+ @Option(name="--decoderConfig", aliases={"-c"}, metaVar="DECODER.CFG", required=false, usage="configuration file for the decoder (i.e., joshua.config")
+ private File configFile = null;
+
+ @Option(name = "-C", handler=MapOptionHandler.class, metaVar = "<property>=<value>", usage = "use value for given key to override flags in the config file, i.e., -C top_n=4", required=false)
+ private Map<String, String> overrides = new HashMap<>();
+
+ @Option(name="--verbose", aliases={"-v"}, required=false, usage="log level of the decoder")
+ private String logLevel = Level.INFO.toString();
+
+ @Option(name="--help", aliases={"-h"}, required=false, usage="show configuration options and quit.")
+ private boolean help = false;
+
+ /**
+ * Returns the flags composed of default config, given config, and commandline overrides.
+ */
+ private Config getFlags() {
+ final ConfigParseOptions options = ConfigParseOptions.defaults().setAllowMissing(false);
+ final Config defaultConfig = Decoder.getDefaultFlags();
+ Config givenConfig = ConfigFactory.empty();
+ if (configFile != null) {
+ givenConfig = ConfigFactory.parseFile(configFile, options).resolveWith(defaultConfig);
+ LOG.info("Config: {}", configFile.toString());
+ }
+ final Config config = ConfigFactory.parseMap(overrides, "CmdLine overrides")
+ .resolve()
+ .withFallback(givenConfig)
+ .withFallback(defaultConfig);
+ return config;
+ }
+
+ private static void printFlags(Config flags) {
+ System.err.println("Joshua configuration options with default values:");
+ System.err.println(
+ flags.root().render(ConfigRenderOptions
+ .concise()
+ .setFormatted(true)
+ .setComments(true)));
+ }
- // ===============================================================
- // Main
- // ===============================================================
- public static void main(String[] args) throws IOException {
-
- // default log level
- LogManager.getRootLogger().setLevel(Level.INFO);
-
- JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
- ArgsParser userArgs = new ArgsParser(args,joshuaConfiguration);
-
- long startTime = System.currentTimeMillis();
-
- /* Step-0: some sanity checking */
- joshuaConfiguration.sanityCheck();
-
- /* Step-1: initialize the decoder, test-set independent */
- Decoder decoder = new Decoder(joshuaConfiguration);
-
- LOG.info("Model loading took {} seconds", (System.currentTimeMillis() - startTime) / 1000);
- LOG.info("Memory used {} MB", ((Runtime.getRuntime().totalMemory()
- - Runtime.getRuntime().freeMemory()) / 1000000.0));
-
- /* Step-2: Decoding */
+ private void run() throws IOException {
+
+ // set loglevel
+ LogManager.getRootLogger().setLevel(Level.toLevel(logLevel));
+
+ // load & compose flags
+ final Config config = getFlags();
+
+ if (help) {
+ printFlags(config);
+ return;
+ }
+
+ // initialize the Decoder
+ final long initStartTime = System.currentTimeMillis();
+ final Decoder decoder = new Decoder(config);
+ final float initTime = (System.currentTimeMillis() - initStartTime) / 1000.0f;
+ final float usedMemory = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1000000.0f;
+ LOG.info("Model loading took {} seconds", initTime);
+ LOG.info("Memory used {} MB", usedMemory);
+
// create a server if requested, which will create TranslationRequest objects
- if (joshuaConfiguration.server_port > 0) {
- int port = joshuaConfiguration.server_port;
- if (joshuaConfiguration.server_type == SERVER_TYPE.TCP) {
- new TcpServer(decoder, port, joshuaConfiguration).start();
+ final Config serverConfig = config.getConfig("serverSettings");
+ if (serverConfig.getInt("server_port") > 0) {
+ final int port = serverConfig.getInt("server_port");
+ final ServerType serverType = ServerType.valueOf(serverConfig.getString("server_type"));
+ if (serverType == ServerType.TCP) {
+ new TcpServer(decoder, port).start();
- } else if (joshuaConfiguration.server_type == SERVER_TYPE.HTTP) {
- joshuaConfiguration.use_structured_output = true;
+ } else if (serverType == ServerType.HTTP) {
+ checkState(decoder.getDecoderConfig().getFlags().getBoolean("use_structured_output"));
HttpServer server = HttpServer.create(new InetSocketAddress(port), 0);
LOG.info("HTTP Server running and listening on port {}.", port);
- server.createContext("/", new ServerThread(null, decoder, joshuaConfiguration));
+ server.createContext("/", new ServerThread(null, decoder));
server.setExecutor(null); // creates a default executor
server.start();
} else {
@@ -95,18 +146,19 @@ public class JoshuaDecoder {
}
// Create a TranslationRequest object, reading from a file if requested, or from STDIN
- InputStream input = (joshuaConfiguration.input_file != null)
- ? new FileInputStream(joshuaConfiguration.input_file)
+ InputStream input = (!config.getString("input_file").isEmpty())
+ ? new FileInputStream(config.getString("input_file"))
: System.in;
-
+
BufferedReader reader = new BufferedReader(new InputStreamReader(input));
- TranslationRequestStream fileRequest = new TranslationRequestStream(reader, joshuaConfiguration);
+ TranslationRequestStream fileRequest = new TranslationRequestStream(reader, config);
TranslationResponseStream translationResponseStream = decoder.decodeAll(fileRequest);
// Create the n-best output stream
FileWriter nbest_out = null;
- if (joshuaConfiguration.n_best_file != null)
- nbest_out = new FileWriter(joshuaConfiguration.n_best_file);
+ if (!config.getString("n_best_file").isEmpty()) {
+ nbest_out = new FileWriter(config.getString("n_best_file"));
+ }
for (Translation translation: translationResponseStream) {
/**
@@ -115,14 +167,15 @@ public class JoshuaDecoder {
* Moses expects the simple translation on STDOUT and the n-best list in a file with a fixed
* format.
*/
- if (joshuaConfiguration.moses) {
+ if (config.getBoolean("moses")) {
String text = translation.toString().replaceAll("=", "= ");
// Write the complete formatted string to STDOUT
- if (joshuaConfiguration.n_best_file != null)
+ if (!config.getString("n_best_file").isEmpty()) {
nbest_out.write(text);
+ }
// Extract just the translation and output that to STDOUT
- text = text.substring(0, text.indexOf('\n'));
+ text = text.substring(0, text.indexOf('\n'));
String[] fields = text.split(" \\|\\|\\| ");
text = fields[1];
@@ -133,8 +186,9 @@ public class JoshuaDecoder {
}
}
- if (joshuaConfiguration.n_best_file != null)
+ if (!config.getString("n_best_file").isEmpty()) {
nbest_out.close();
+ }
LOG.info("Decoding completed.");
LOG.info("Memory used {} MB", ((Runtime.getRuntime().totalMemory()
@@ -142,6 +196,22 @@ public class JoshuaDecoder {
/* Step-3: clean up */
decoder.cleanUp();
- LOG.info("Total running time: {} seconds", (System.currentTimeMillis() - startTime) / 1000);
+ LOG.info("Total running time: {} seconds", (System.currentTimeMillis() - initStartTime) / 1000);
+ }
+
+ public static void main(String[] args) {
+ final JoshuaDecoder cli = new JoshuaDecoder();
+ final CmdLineParser parser = new CmdLineParser(cli);
+ try {
+ parser.parseArgument(args);
+ cli.run();
+ } catch (CmdLineException e) {
+ // handling of wrong arguments
+ LOG.error(e.getMessage());
+ parser.printUsage(System.err);
+ System.exit(1);
+ } catch (IOException e) {
+ Throwables.propagate(e);
+ }
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/OOVItem.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/OOVItem.java b/joshua-core/src/main/java/org/apache/joshua/decoder/OOVItem.java
new file mode 100644
index 0000000..bed731c
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/OOVItem.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+/*
+ * A list of OOV symbols in the form
+ *
+ * [X1] weight [X2] weight [X3] weight ...
+ *
+ * where the [X] symbols are nonterminals and the weights are weights. For each OOV word w in the
+ * input sentence, Joshua will create rules of the form
+ *
+ * X1 -> w (weight)
+ *
+ * If this is empty, an unweighted default_non_terminal is used.
+ */
+public class OOVItem implements Comparable<OOVItem> {
+ public final String label;
+
+ public final float weight;
+
+ OOVItem(String l, float w) {
+ label = l;
+ weight = w;
+ }
+ @Override
+ public int compareTo(OOVItem other) {
+ if (weight > other.weight)
+ return -1;
+ else if (weight < other.weight)
+ return 1;
+ return 0;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/SearchAlgorithm.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/SearchAlgorithm.java b/joshua-core/src/main/java/org/apache/joshua/decoder/SearchAlgorithm.java
new file mode 100644
index 0000000..c0abafc
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/SearchAlgorithm.java
@@ -0,0 +1,7 @@
+package org.apache.joshua.decoder;
+
+public enum SearchAlgorithm {
+
+ cky, stack;
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ServerType.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ServerType.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ServerType.java
new file mode 100644
index 0000000..2488f79
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ServerType.java
@@ -0,0 +1,4 @@
+package org.apache.joshua.decoder;
+
+/* Type of server. Not sure we need to keep the regular TCP one around. */
+public enum ServerType { none, TCP, HTTP }
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java b/joshua-core/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
index 9f32d31..454e0bb 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
@@ -128,8 +128,7 @@ public class StructuredTranslation {
private String maybeProjectCase(String hypothesis) {
String output = hypothesis;
- JoshuaConfiguration config = sourceSentence.config;
- if (config.project_case) {
+ if (sourceSentence.getConfig().getBoolean("project_case")) {
String[] tokens = hypothesis.split("\\s+");
List<List<Integer>> points = getTranslationWordAlignments();
for (int i = 0; i < points.size(); i++) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/Translation.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/Translation.java b/joshua-core/src/main/java/org/apache/joshua/decoder/Translation.java
index dfe839a..4ccf79c 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/Translation.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/Translation.java
@@ -31,9 +31,7 @@ import java.io.StringWriter;
import java.util.Collections;
import java.util.List;
-import org.apache.joshua.decoder.ff.FeatureFunction;
import org.apache.joshua.decoder.ff.FeatureVector;
-import org.apache.joshua.decoder.ff.lm.StateMinimizingLanguageModel;
import org.apache.joshua.decoder.hypergraph.HyperGraph;
import org.apache.joshua.decoder.hypergraph.KBestExtractor;
import org.apache.joshua.decoder.io.DeNormalize;
@@ -67,21 +65,21 @@ public class Translation {
*/
private List<StructuredTranslation> structuredTranslations = null;
- public Translation(Sentence source, HyperGraph hypergraph,
- List<FeatureFunction> featureFunctions, JoshuaConfiguration joshuaConfiguration) {
+ public Translation(final Sentence source,
+ final HyperGraph hypergraph, DecoderConfig config) {
this.source = source;
/**
* Structured output from Joshua provides a way to programmatically access translation results
* from downstream applications, instead of writing results as strings to an output buffer.
*/
- if (joshuaConfiguration.use_structured_output) {
+ if (config.getFlags().getBoolean("use_structured_output")) {
- if (joshuaConfiguration.topN == 0) {
+ if (config.getFlags().getInt("top_n") == 0) {
/*
* Obtain Viterbi StructuredTranslation
*/
- StructuredTranslation translation = fromViterbiDerivation(source, hypergraph, featureFunctions);
+ StructuredTranslation translation = fromViterbiDerivation(source, hypergraph, config.getFeatureFunctions());
this.output = translation.getTranslationString();
structuredTranslations = Collections.singletonList(translation);
@@ -89,8 +87,8 @@ public class Translation {
/*
* Get K-Best list of StructuredTranslations
*/
- final KBestExtractor kBestExtractor = new KBestExtractor(source, featureFunctions, Decoder.weights, false, joshuaConfiguration);
- structuredTranslations = kBestExtractor.KbestExtractOnHG(hypergraph, joshuaConfiguration.topN);
+ final KBestExtractor kBestExtractor = new KBestExtractor(source, config, false);
+ structuredTranslations = kBestExtractor.KbestExtractOnHG(hypergraph, config.getFlags().getInt("top_n"));
if (structuredTranslations.isEmpty()) {
structuredTranslations = Collections
.singletonList(StructuredTranslationFactory.fromEmptyOutput(source));
@@ -112,7 +110,7 @@ public class Translation {
long startTime = System.currentTimeMillis();
- if (joshuaConfiguration.topN == 0) {
+ if (config.getFlags().getInt("top_n") == 0) {
/* construct Viterbi output */
final String best = getViterbiString(hypergraph);
@@ -124,18 +122,19 @@ public class Translation {
* the output-string, with the understanding that we can only substitute variables for the
* output string, sentence number, and model score.
*/
- String translation = joshuaConfiguration.outputFormat
+ String outputFormat = config.getFlags().getString("output_format");
+ String translation = outputFormat
.replace("%s", removeSentenceMarkers(best))
.replace("%S", DeNormalize.processSingleLine(best))
.replace("%c", String.format("%.3f", hypergraph.goalNode.getScore()))
.replace("%i", String.format("%d", source.id()));
- if (joshuaConfiguration.outputFormat.contains("%a")) {
+ if (outputFormat.contains("%a")) {
translation = translation.replace("%a", getViterbiWordAlignments(hypergraph));
}
- if (joshuaConfiguration.outputFormat.contains("%f")) {
- final FeatureVector features = getViterbiFeatures(hypergraph, featureFunctions, source);
+ if (outputFormat.contains("%f")) {
+ final FeatureVector features = getViterbiFeatures(hypergraph, config.getFeatureFunctions(), source);
translation = translation.replace("%f", features.textFormat());
}
@@ -145,27 +144,28 @@ public class Translation {
} else {
final KBestExtractor kBestExtractor = new KBestExtractor(
- source, featureFunctions, Decoder.weights, false, joshuaConfiguration);
- kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
+ source, config, false);
+ kBestExtractor.lazyKBestExtractOnHG(hypergraph, config.getFlags().getInt("top_n"), out);
- if (joshuaConfiguration.rescoreForest) {
+ if (config.getFlags().getBoolean("rescore_forest")) {
final int bleuFeatureHash = hashFeature("BLEU");
- Decoder.weights.add(bleuFeatureHash, joshuaConfiguration.rescoreForestWeight);
- kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
+ // TODO(fhieber): this is fishy, why would we want to change the decoder weights HERE?
+ config.getWeights().add(bleuFeatureHash, (float) config.getFlags().getDouble("rescore_forest_weight"));
+ kBestExtractor.lazyKBestExtractOnHG(hypergraph, config.getFlags().getInt("top_n"), out);
- Decoder.weights.add(bleuFeatureHash, -joshuaConfiguration.rescoreForestWeight);
- kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
+ config.getWeights().add(bleuFeatureHash, -(float) config.getFlags().getDouble("rescore_forest_weight"));
+ kBestExtractor.lazyKBestExtractOnHG(hypergraph, config.getFlags().getInt("top_n"), out);
}
}
float seconds = (System.currentTimeMillis() - startTime) / 1000.0f;
LOG.info("Input {}: {}-best extraction took {} seconds", id(),
- joshuaConfiguration.topN, seconds);
+ config.getFlags().getInt("top_n"), seconds);
} else {
// Failed translations and blank lines get empty formatted outputs
- out.write(getFailedTranslationOutput(source, joshuaConfiguration));
+ out.write(getFailedTranslationOutput(source, config.getFlags().getString("output_format")));
out.newLine();
}
@@ -198,8 +198,8 @@ public class Translation {
return output;
}
- private String getFailedTranslationOutput(final Sentence source, final JoshuaConfiguration joshuaConfiguration) {
- return joshuaConfiguration.outputFormat
+ private String getFailedTranslationOutput(final Sentence source, final String outputFormat) {
+ return outputFormat
.replace("%s", source.source())
.replace("%e", "")
.replace("%S", "")
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/TranslationRequest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/TranslationRequest.java b/joshua-core/src/main/java/org/apache/joshua/decoder/TranslationRequest.java
new file mode 100644
index 0000000..73de3c4
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/TranslationRequest.java
@@ -0,0 +1,24 @@
+package org.apache.joshua.decoder;
+
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+public class TranslationRequest {
+
+ private final Sentence sentence;
+ private final DecoderConfig decoderConfig;
+
+ public TranslationRequest(final Sentence sentence, final DecoderConfig decoderConfig) {
+ this.sentence = sentence;
+ this.decoderConfig = decoderConfig;
+ }
+
+ public Sentence getSentence() {
+ return sentence;
+ }
+
+ public DecoderConfig getDecoderConfig() {
+ return decoderConfig;
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
index a1c3093..7bfd830 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
@@ -18,6 +18,8 @@
*/
package org.apache.joshua.decoder.chart_parser;
+import static org.apache.joshua.decoder.chart_parser.ComputeNodeResult.computeNodeResult;
+
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
@@ -25,16 +27,12 @@ import java.util.List;
import java.util.PriorityQueue;
import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.DecoderConfig;
import org.apache.joshua.decoder.chart_parser.DotChart.DotNode;
-import org.apache.joshua.decoder.ff.FeatureFunction;
-import org.apache.joshua.decoder.ff.SourceDependentFF;
-import org.apache.joshua.decoder.ff.tm.AbstractGrammar;
import org.apache.joshua.decoder.ff.tm.Grammar;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.ff.tm.RuleCollection;
import org.apache.joshua.decoder.ff.tm.Trie;
-import org.apache.joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.hypergraph.HyperGraph;
import org.apache.joshua.decoder.segment_file.Sentence;
@@ -46,7 +44,7 @@ import org.apache.joshua.util.ChartSpan;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static org.apache.joshua.decoder.chart_parser.ComputeNodeResult.computeNodeResult;
+import com.google.common.collect.ImmutableList;
/**
* Chart class this class implements chart-parsing: (1) seeding the chart (2)
@@ -67,11 +65,12 @@ import static org.apache.joshua.decoder.chart_parser.ComputeNodeResult.computeNo
public class Chart {
private static final Logger LOG = LoggerFactory.getLogger(Chart.class);
- private final JoshuaConfiguration config;
- // ===========================================================
- // Statistics
- // ===========================================================
-
+
+ private final DecoderConfig config;
+
+ private final int numTranslationOptions;
+ private final int popLimit;
+
/**
* how many items have been pruned away because its cost is greater than the
* cutoff in calling chart.add_deduction_in_chart()
@@ -79,32 +78,17 @@ public class Chart {
int nMerged = 0;
int nAdded = 0;
int nDotitemAdded = 0; // note: there is no pruning in dot-item
-
- public Sentence getSentence() {
- return this.sentence;
- }
- // ===============================================================
- // Private instance fields (maybe could be protected instead)
- // ===============================================================
private final ChartSpan<Cell> cells; // note that in some cell, it might be null
private final int sourceLength;
- private final List<FeatureFunction> featureFunctions;
- private final Grammar[] grammars;
private final DotChart[] dotcharts; // each grammar should have a dotchart associated with it
private Cell goalBin;
private int goalSymbolID = -1;
private final Lattice<Token> inputLattice;
- private Sentence sentence = null;
-// private SyntaxTree parseTree;
+ private final Sentence sentence;
private StateConstraint stateConstraint;
-
- // ===============================================================
- // Constructors
- // ===============================================================
-
/*
* TODO: Once the Segment interface is adjusted to provide a Lattice<String>
* for the sentence() method, we should just accept a Segment instead of the
@@ -116,52 +100,32 @@ public class Chart {
* grammars too so we could move all of that into here.
*/
- public Chart(Sentence sentence, List<FeatureFunction> featureFunctions, Grammar[] grammars,
- String goalSymbol, JoshuaConfiguration config) {
+ public Chart(final Sentence sentence, final DecoderConfig config) {
+
this.config = config;
+ this.numTranslationOptions = config.getFlags().getInt("num_translation_options");
+ this.popLimit = config.getFlags().getInt("pop_limit");
+
this.inputLattice = sentence.getLattice();
this.sourceLength = inputLattice.size() - 1;
- this.featureFunctions = featureFunctions;
-
this.sentence = sentence;
-
- // TODO: OOV handling no longer handles parse tree input (removed after
- // commit 748eb69714b26dd67cba8e7c25a294347603bede)
-// this.parseTree = null;
-// if (sentence instanceof ParsedSentence)
-// this.parseTree = ((ParsedSentence) sentence).syntaxTree();
-//
this.cells = new ChartSpan<>(sourceLength, null);
-
- this.goalSymbolID = Vocabulary.id(goalSymbol);
+ this.goalSymbolID = Vocabulary.id(config.getFlags().getString("goal_symbol"));
this.goalBin = new Cell(this, this.goalSymbolID);
- /* Create the grammars, leaving space for the OOV grammar. */
- this.grammars = new Grammar[grammars.length + 1];
- System.arraycopy(grammars, 0, this.grammars, 1, grammars.length);
-
- MemoryBasedBatchGrammar oovGrammar = new MemoryBasedBatchGrammar("oov", this.config, 20);
- AbstractGrammar.addOOVRules(oovGrammar, sentence.getLattice(), featureFunctions,
- this.config.true_oovs_only);
- this.grammars[0] = oovGrammar;
-
// each grammar will have a dot chart
- this.dotcharts = new DotChart[this.grammars.length];
- for (int i = 0; i < this.grammars.length; i++)
- this.dotcharts[i] = new DotChart(this.inputLattice, this.grammars[i], this);
+ final int numGrammars = config.getGrammars().size();
+ this.dotcharts = new DotChart[numGrammars];
+ for (int i = 0; i < numGrammars; i++) {
+ this.dotcharts[i] = new DotChart(this.inputLattice, config.getGrammars().get(i), this);
+ }
// Begin to do initialization work
-
stateConstraint = null;
- if (sentence.target() != null)
- // stateConstraint = new StateConstraint(sentence.target());
+ if (sentence.target() != null) {
stateConstraint = new StateConstraint(Vocabulary.START_SYM + " " + sentence.target() + " "
+ Vocabulary.STOP_SYM);
-
- /* Find the SourceDependent feature and give it access to the sentence. */
- this.featureFunctions.stream().filter(ff -> ff instanceof SourceDependentFF)
- .forEach(ff -> ((SourceDependentFF) ff).setSource(sentence));
-
+ }
LOG.debug("Finished seeding chart.");
}
@@ -180,6 +144,10 @@ public class Chart {
this.goalSymbolID = i;
this.goalBin = new Cell(this, i);
}
+
+ public Sentence getSentence() {
+ return this.sentence;
+ }
// ===============================================================
// The primary method for filling in the chart
@@ -209,8 +177,8 @@ public class Chart {
* Look at all the grammars, seeding the chart with completed rules from the
* DotChart
*/
- for (int g = 0; g < grammars.length; g++) {
- if (!grammars[g].hasRuleForSpan(i, j, inputLattice.distance(i, j))
+ for (int g = 0; g < config.getGrammars().size(); g++) {
+ if (!config.getGrammars().get(g).hasRuleForSpan(i, j, inputLattice.distance(i, j))
|| null == dotcharts[g].getDotCell(i, j))
continue;
@@ -220,7 +188,7 @@ public class Chart {
if (ruleCollection == null)
continue;
- List<Rule> rules = ruleCollection.getSortedRules(this.featureFunctions);
+ List<Rule> rules = ruleCollection.getSortedRules(config.getFeatureFunctions());
SourcePath sourcePath = dotNode.getSourcePath();
if (null == rules || rules.size() == 0)
@@ -238,12 +206,11 @@ public class Chart {
/* Terminal productions are added directly to the chart */
for (Rule rule : rules) {
- if (config.num_translation_options > 0
- && numTranslationsAdded >= config.num_translation_options) {
+ if (numTranslationOptions > 0 && numTranslationsAdded >= numTranslationOptions) {
break;
}
- NodeResult result = computeNodeResult(this.featureFunctions, rule, null, i,
+ NodeResult result = computeNodeResult(config, rule, null, i,
j, sourcePath, this.sentence);
if (stateConstraint == null || stateConstraint.isLegal(result.getDPStates())) {
@@ -272,7 +239,7 @@ public class Chart {
int[] ranks = new int[1 + superNodes.size()];
Arrays.fill(ranks, 1);
- NodeResult result = computeNodeResult(featureFunctions, bestRule,
+ NodeResult result = computeNodeResult(config, bestRule,
currentTailNodes, i, j, sourcePath, sentence);
CubePruneState bestState = new CubePruneState(result, ranks, rules, currentTailNodes,
dotNode);
@@ -306,7 +273,6 @@ public class Chart {
*/
HashSet<CubePruneState> visitedStates = new HashSet<>();
- int popLimit = config.pop_limit;
int popCount = 0;
while (candidates.size() > 0 && ((++popCount <= popLimit) || popLimit == 0)) {
CubePruneState state = candidates.poll();
@@ -343,7 +309,7 @@ public class Chart {
* nodes)
*/
if (k == 0
- && (nextRanks[k] > rules.size() || (config.num_translation_options > 0 && nextRanks[k] > config.num_translation_options)))
+ && (nextRanks[k] > rules.size() || (numTranslationOptions > 0 && nextRanks[k] > numTranslationOptions)))
continue;
else if ((k != 0 && nextRanks[k] > superNodes.get(k - 1).nodes.size()))
continue;
@@ -356,7 +322,7 @@ public class Chart {
nextAntNodes.add(superNodes.get(x).nodes.get(nextRanks[x + 1] - 1));
/* Create the next state. */
- CubePruneState nextState = new CubePruneState(computeNodeResult(featureFunctions,
+ CubePruneState nextState = new CubePruneState(computeNodeResult(config,
nextRule, nextAntNodes, i, j, sourcePath, this.sentence), nextRanks, rules,
nextAntNodes, dotNode);
@@ -394,7 +360,7 @@ public class Chart {
if (!sentence.hasPath(i, j))
continue;
- for (Grammar grammar : this.grammars) {
+ for (Grammar grammar : config.getGrammars()) {
// System.err.println(String.format("\n*** I=%d J=%d GRAMMAR=%d", i, j, g));
if (j == i + 1) {
@@ -418,13 +384,13 @@ public class Chart {
applyCubePruning(i, j, allCandidates[j - i]);
// Add unary nodes
- addUnaryNodes(this.grammars, i, j);
+ addUnaryNodes(config.getGrammars(), i, j);
}
}
// transition_final: setup a goal item, which may have many deductions
if (null == this.cells.get(0, sourceLength)
- || !this.goalBin.transitToGoal(this.cells.get(0, sourceLength), this.featureFunctions,
+ || !this.goalBin.transitToGoal(this.cells.get(0, sourceLength), config.getFeatureFunctions(),
this.sourceLength)) {
LOG.warn("Input {}: Parse failure (either no derivations exist or pruning is too aggressive",
sentence.id());
@@ -529,7 +495,7 @@ public class Chart {
// TODO: one entry per rule, or per rule instantiation (rule together with
// unique matching of input)?
- List<Rule> rules = dotNode.getRuleCollection().getSortedRules(featureFunctions);
+ List<Rule> rules = dotNode.getRuleCollection().getSortedRules(config.getFeatureFunctions());
Rule bestRule = rules.get(0);
List<SuperNode> superNodes = dotNode.getAntSuperNodes();
@@ -540,7 +506,7 @@ public class Chart {
int[] ranks = new int[1 + superNodes.size()];
Arrays.fill(ranks, 1);
- NodeResult result = computeNodeResult(featureFunctions, bestRule, tailNodes,
+ NodeResult result = computeNodeResult(config, bestRule, tailNodes,
dotNode.begin(), dotNode.end(), dotNode.getSourcePath(), sentence);
CubePruneState seedState = new CubePruneState(result, ranks, rules, tailNodes, dotNode);
@@ -572,13 +538,13 @@ public class Chart {
*/
if (LOG.isDebugEnabled())
LOG.debug("Expanding cell");
- for (int k = 0; k < this.grammars.length; k++) {
+ for (int k = 0; k < config.getGrammars().size(); k++) {
/**
* Each dotChart can act individually (without consulting other
* dotCharts) because it either consumes the source input or the
* complete nonTerminals, which are both grammar-independent.
**/
- this.dotcharts[k].expandDotCell(i, j);
+ dotcharts[k].expandDotCell(i, j);
}
/*
@@ -592,16 +558,16 @@ public class Chart {
/* 3. Process unary rules. */
if (LOG.isDebugEnabled())
LOG.debug("Adding unary items into chart");
- addUnaryNodes(this.grammars, i, j);
+ addUnaryNodes(config.getGrammars(), i, j);
// (4)=== in dot_cell(i,j), add dot-nodes that start from the /complete/
// superIterms in
// chart_cell(i,j)
if (LOG.isDebugEnabled())
LOG.debug("Initializing new dot-items that start from complete items in this cell");
- for (int k = 0; k < this.grammars.length; k++) {
- if (this.grammars[k].hasRuleForSpan(i, j, inputLattice.distance(i, j))) {
- this.dotcharts[k].startDotItems(i, j);
+ for (int k = 0; k < config.getGrammars().size(); k++) {
+ if (config.getGrammars().get(k).hasRuleForSpan(i, j, inputLattice.distance(i, j))) {
+ dotcharts[k].startDotItems(i, j);
}
}
@@ -621,7 +587,7 @@ public class Chart {
// transition_final: setup a goal item, which may have many deductions
if (null == this.cells.get(0, sourceLength)
- || !this.goalBin.transitToGoal(this.cells.get(0, sourceLength), this.featureFunctions,
+ || !this.goalBin.transitToGoal(this.cells.get(0, sourceLength), config.getFeatureFunctions(),
this.sourceLength)) {
LOG.warn("Input {}: Parse failure (either no derivations exist or pruning is too aggressive",
sentence.id());
@@ -671,7 +637,7 @@ public class Chart {
* @param j
* @return the number of nodes added
*/
- private int addUnaryNodes(Grammar[] grammars, int i, int j) {
+ private int addUnaryNodes(final ImmutableList<Grammar> grammars, int i, int j) {
Cell chartBin = this.cells.get(i, j);
if (null == chartBin) {
@@ -703,10 +669,10 @@ public class Chart {
ArrayList<HGNode> antecedents = new ArrayList<>();
antecedents.add(node);
- List<Rule> rules = childNode.getRuleCollection().getSortedRules(this.featureFunctions);
+ List<Rule> rules = childNode.getRuleCollection().getSortedRules(config.getFeatureFunctions());
for (Rule rule : rules) { // for each unary rules
- NodeResult states = computeNodeResult(this.featureFunctions, rule,
+ NodeResult states = computeNodeResult(this.config, rule,
antecedents, i, j, new SourcePath(), this.sentence);
HGNode resNode = chartBin.addHyperEdgeInCell(states, rule, i, j, antecedents,
new SourcePath(), true);
@@ -738,7 +704,7 @@ public class Chart {
}
this.cells.get(i, j).addHyperEdgeInCell(
- computeNodeResult(this.featureFunctions, rule, null, i, j, srcPath, sentence), rule, i,
+ computeNodeResult(config, rule, null, i, j, srcPath, sentence), rule, i,
j, null, srcPath, false);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
index cfaf96a..53ec535 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
@@ -19,9 +19,10 @@
package org.apache.joshua.decoder.chart_parser;
-import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.DecoderConfig;
import org.apache.joshua.decoder.ff.FeatureFunction;
import org.apache.joshua.decoder.ff.FeatureVector;
+import org.apache.joshua.decoder.ff.ScoreAccumulator;
import org.apache.joshua.decoder.ff.StatefulFF;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
@@ -55,7 +56,7 @@ public class ComputeNodeResult {
* @param sourcePath information about a path taken through the source lattice
* @param sentence the lattice input
*/
- public static NodeResult computeNodeResult(List<FeatureFunction> featureFunctions, Rule rule, List<HGNode> tailNodes,
+ public static NodeResult computeNodeResult(DecoderConfig config, Rule rule, List<HGNode> tailNodes,
int i, int j, SourcePath sourcePath, Sentence sentence) {
// The total Viterbi cost of this edge. This is the Viterbi cost of the tail nodes, plus
@@ -91,26 +92,27 @@ public class ComputeNodeResult {
// The future cost estimate is a heuristic estimate of the outside cost of this edge.
float futureCostEstimate = 0.0f;
- /*
- * We now iterate over all the feature functions, computing their cost and their expected future
- * cost.
- */
- for (FeatureFunction feature : featureFunctions) {
- FeatureFunction.ScoreAccumulator acc = feature.new ScoreAccumulator();
+ /*
+ * We now iterate over all the feature functions, computing their cost and their expected future
+ * cost.
+ */
+ for (FeatureFunction feature : config.getFeatureFunctions()) {
+ ScoreAccumulator acc = new ScoreAccumulator(config.getWeights());
DPState newState = feature.compute(rule, tailNodes, i, j, sourcePath, sentence, acc);
transitionCost += acc.getScore();
if (LOG.isDebugEnabled()) {
LOG.debug("FEATURE {} = {} * {} = {}", feature.getName(),
- acc.getScore() / Decoder.weights.getOrDefault(hashFeature(feature.getName())),
- Decoder.weights.getOrDefault(hashFeature(feature.getName())), acc.getScore());
+ acc.getScore() / config.getWeights().getOrDefault(hashFeature(feature.getName())),
+ config.getWeights().getOrDefault(hashFeature(feature.getName())), acc.getScore());
}
if (feature.isStateful()) {
futureCostEstimate += feature.estimateFutureCost(rule, newState, sentence);
allDPStates.add(((StatefulFF)feature).getStateIndex(), newState);
}
+
}
viterbiCost += transitionCost;
if (LOG.isDebugEnabled())
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/Accumulator.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/Accumulator.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/Accumulator.java
new file mode 100644
index 0000000..d80b850
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/Accumulator.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+/**
+ * Accumulator objects allow us to generalize feature computation.
+ * ScoreAccumulator takes (feature,value) pairs and simple stores the weighted
+ * sum (for decoding). FeatureAccumulator records the named feature values
+ * (for k-best extraction).
+ */
+public interface Accumulator {
+ public void add(int featureId, float value);
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
index 996f40d..5fe2c0d 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
@@ -20,7 +20,6 @@ package org.apache.joshua.decoder.ff;
import java.util.List;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.OwnerId;
@@ -29,6 +28,8 @@ import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
+import com.typesafe.config.Config;
+
/**
* This feature function counts rules from a particular grammar (identified by the owner) having an
* arity within a specific range. It expects three parameters upon initialization: the owner, the
@@ -44,11 +45,11 @@ public class ArityPhrasePenalty extends StatelessFF {
private final int minArity;
private final int maxArity;
- public ArityPhrasePenalty(final FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, "ArityPenalty", args, config);
- this.owner = OwnerMap.register(parsedArgs.get("owner"));
- this.minArity = Integer.parseInt(parsedArgs.get("min-arity"));
- this.maxArity = Integer.parseInt(parsedArgs.get("max-arity"));
+ public ArityPhrasePenalty(Config featureConfig, FeatureVector weights) {
+ super("ArityPenalty", featureConfig, weights);
+ this.owner = OwnerMap.register(this.featureConfig.getString("owner"));
+ this.minArity = this.featureConfig.getInt("min_arity");
+ this.maxArity = this.featureConfig.getInt("max_arity");
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureAccumulator.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureAccumulator.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureAccumulator.java
new file mode 100644
index 0000000..b05c588
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureAccumulator.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+public class FeatureAccumulator implements Accumulator {
+ private final FeatureVector features;
+
+ public FeatureAccumulator() {
+ this.features = new FeatureVector(10);
+ }
+
+ @Override
+ public void add(int id, float value) {
+ features.add(id, value);
+ }
+
+ public FeatureVector getFeatures() {
+ return features;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
index 802aadd..0596cc2 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
@@ -20,18 +20,16 @@ package org.apache.joshua.decoder.ff;
import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
-import java.util.HashMap;
import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
+import com.typesafe.config.Config;
+
/**
* <p>This class defines Joshua's feature function interface, for both sparse and
* dense features. It is immediately inherited by StatelessFF and StatefulFF,
@@ -81,19 +79,12 @@ public abstract class FeatureFunction {
*/
protected int featureId;
- // The list of arguments passed to the feature, and the hash for the parsed args
- protected final String[] args;
- protected final HashMap<String, String> parsedArgs;
-
- /*
- * The global weight vector used by the decoder, passed it when the feature is
- * instantiated
- */
+ // The configuration passed to the feature
+ protected final Config featureConfig;
+
+ // reference to the global decoder weights
protected final FeatureVector weights;
- /* The config */
- protected final JoshuaConfiguration config;
-
public String getName() {
return name;
}
@@ -101,23 +92,21 @@ public abstract class FeatureFunction {
// Whether the feature has state.
public abstract boolean isStateful();
- public FeatureFunction(FeatureVector weights, String name, String[] args, JoshuaConfiguration config) {
- this.weights = weights;
+ public FeatureFunction(final String name, final Config featureConfig, final FeatureVector weights) {
this.name = name;
- this.featureId = FeatureMap.hashFeature(this.name);
- this.args = args;
- this.config = config;
- this.parsedArgs = FeatureFunction.parseArgs(args);
+ this.featureId = hashFeature(this.name); // TODO(fhieber) proper hashing here
+ this.featureConfig = featureConfig;
+ this.weights = weights;
}
public String logString() {
- return String.format("%s (weight %.3f)", name, weights.getOrDefault(hashFeature(name)));
+ return String.format("%s (weight %.3f)", name, weights.getOrDefault(featureId));
}
/**
* This is the main function for defining feature values. The implementor
* should compute all the features along the hyperedge, calling
- * {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator#add(String, float)}
+ * {@link org.apache.joshua.decoder.ff.Accumulator#add(String, float)}
* for each feature. It then returns the newly-computed dynamic
* programming state for this feature (for example, for the
* {@link org.apache.joshua.decoder.ff.lm.LanguageModelFF} feature, this returns the new language model
@@ -132,14 +121,14 @@ public abstract class FeatureFunction {
* @param j todo
* @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
* @param sentence {@link org.apache.joshua.lattice.Lattice} input
- * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
+ * @param acc {@link org.apache.joshua.decoder.ff.Accumulator} object permitting generalization of feature computation
* @return the new dynamic programming state (null for stateless features)
*/
public abstract DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j,
SourcePath sourcePath, Sentence sentence, Accumulator acc);
/**
- * Feature functions must overrided this. StatefulFF and StatelessFF provide
+ * Feature functions must override this. StatefulFF and StatelessFF provide
* reasonable defaults since most features do not fire on the goal node.
*
* @param tailNode single {@link org.apache.joshua.decoder.hypergraph.HGNode} representing tail node
@@ -147,7 +136,7 @@ public abstract class FeatureFunction {
* @param j todo
* @param sourcePath information about a path taken through the source {@link org.apache.joshua.lattice.Lattice}
* @param sentence {@link org.apache.joshua.lattice.Lattice} input
- * @param acc {@link org.apache.joshua.decoder.ff.FeatureFunction.Accumulator} object permitting generalization of feature computation
+ * @param acc {@link org.apache.joshua.decoder.ff.Accumulator} object permitting generalization of feature computation
* @return the DPState (null if none)
*/
public abstract DPState computeFinal(HGNode tailNode, int i, int j, SourcePath sourcePath,
@@ -195,7 +184,7 @@ public abstract class FeatureFunction {
public final float computeFinalCost(HGNode tailNode, int i, int j, SourcePath sourcePath,
Sentence sentence) {
- ScoreAccumulator score = new ScoreAccumulator();
+ final ScoreAccumulator score = new ScoreAccumulator(weights);
computeFinal(tailNode, i, j, sourcePath, sentence, score);
return score.getScore();
}
@@ -248,92 +237,4 @@ public abstract class FeatureFunction {
* context.
*/
public abstract float estimateFutureCost(Rule rule, DPState state, Sentence sentence);
-
- /**
- * Parses the arguments passed to a feature function in the Joshua config file TODO: Replace this
- * with a proper CLI library at some point Expects key value pairs in the form : -argname value
- * Any key without a value is added with an empty string as value Multiple values for the same key
- * are not parsed. The first one is used.
- *
- * @param args A string with the raw arguments and their names
- * @return A hash with the keys and the values of the string
- */
- public static HashMap<String, String> parseArgs(String[] args) {
- HashMap<String, String> parsedArgs = new HashMap<>();
- boolean lookingForValue = false;
- String currentKey = null;
- for (String arg : args) {
-
- Pattern argKeyPattern = Pattern.compile("^-[a-zA-Z]\\S+");
- Matcher argKey = argKeyPattern.matcher(arg);
- if (argKey.find()) {
- // This is a key
- // First check to see if there is a key that is waiting to be written
- if (lookingForValue) {
- // This is a key with no specified value
- parsedArgs.put(currentKey, "");
- }
- // Now store the new key and look for its value
- currentKey = arg.substring(1);
- lookingForValue = true;
- } else {
- // This is a value
- if (lookingForValue) {
- parsedArgs.put(currentKey, arg);
- lookingForValue = false;
- }
- }
- }
-
- // make sure we add the last key without value
- if (lookingForValue && currentKey != null) {
- // end of line, no value
- parsedArgs.put(currentKey, "");
- }
- return parsedArgs;
- }
-
- /**
- * Accumulator objects allow us to generalize feature computation.
- * ScoreAccumulator takes (feature,value) pairs and simple stores the weighted
- * sum (for decoding). FeatureAccumulator records the named feature values
- * (for k-best extraction).
- */
- public interface Accumulator {
- public void add(int featureId, float value);
- }
-
- public class ScoreAccumulator implements Accumulator {
- private float score;
-
- public ScoreAccumulator() {
- this.score = 0.0f;
- }
-
- @Override
- public void add(int featureId, float value) {
- score += value * weights.getOrDefault(featureId);
- }
-
- public float getScore() {
- return score;
- }
- }
-
- public class FeatureAccumulator implements Accumulator {
- private final FeatureVector features;
-
- public FeatureAccumulator() {
- this.features = new FeatureVector(10);
- }
-
- @Override
- public void add(int id, float value) {
- features.add(id, value);
- }
-
- public FeatureVector getFeatures() {
- return features;
- }
- }
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
index edeae6c..542a3f8 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
@@ -22,19 +22,20 @@ package org.apache.joshua.decoder.ff;
* @author Gideon Wenniger
*/
-import java.util.List;
+import java.util.List;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
+import com.typesafe.config.Config;
+
public class LabelCombinationFF extends StatelessFF {
- public LabelCombinationFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, "LabelCombination", args, config);
+ public LabelCombinationFF(Config featureConfig, FeatureVector weights) {
+ super("LabelCombination", featureConfig, weights);
}
public String getLowerCasedFeatureName() {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
index 9be3f88..96a8a07 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
@@ -26,7 +26,6 @@ import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
import java.util.List;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
@@ -34,12 +33,14 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
import org.apache.joshua.util.ListUtil;
+import com.typesafe.config.Config;
+
public class LabelSubstitutionFF extends StatelessFF {
private static final String MATCH_SUFFIX = "MATCH";
private static final String NO_MATCH_SUFFIX = "NOMATCH";
- public LabelSubstitutionFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, "LabelSubstitution", args, config);
+ public LabelSubstitutionFF(Config featureConfig, FeatureVector weights) {
+ super("LabelSubstitution", featureConfig, weights);
}
public String getLowerCasedFeatureName() {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
index 63d350e..eeb71c7 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
@@ -25,7 +25,6 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.OwnerId;
@@ -36,6 +35,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
import org.apache.joshua.util.FormatUtils;
import com.google.common.cache.Cache;
+import com.typesafe.config.Config;
/**
* Lexical alignment features denoting alignments, deletions, and insertions.
@@ -58,22 +58,18 @@ public class LexicalFeatures extends StatelessFF {
private final Cache<Rule, List<Integer>> featureCache;
- public LexicalFeatures(FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, NAME, args, config);
+ public LexicalFeatures(Config featureConfig, FeatureVector weights) {
+ super(NAME, featureConfig, weights);
- ownerRestriction = (parsedArgs.containsKey("owner"));
- owner = ownerRestriction ? OwnerMap.register(parsedArgs.get("owner")) : OwnerMap.UNKNOWN_OWNER_ID;
+ ownerRestriction = featureConfig.hasPath("owner");
+ owner = ownerRestriction ? OwnerMap.register(featureConfig.getString("owner")) : OwnerMap.UNKNOWN_OWNER_ID;
- useAlignments = parsedArgs.containsKey("alignments");
- useDeletions = parsedArgs.containsKey("deletions");
- useInsertions = parsedArgs.containsKey("insertions");
+ useAlignments = featureConfig.hasPath("alignments");
+ useDeletions = featureConfig.hasPath("deletions");
+ useInsertions = featureConfig.hasPath("insertions");
// initialize cache
- if (parsedArgs.containsKey("cacheSize")) {
- featureCache = newBuilder().maximumSize(Integer.parseInt(parsedArgs.get("cacheSize"))).build();
- } else {
- featureCache = newBuilder().maximumSize(config.cachedRuleSize).build();
- }
+ featureCache = newBuilder().maximumSize(featureConfig.getInt("cache_size")).build();
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
index 5e99428..f6e5512 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
@@ -18,12 +18,10 @@
*/
package org.apache.joshua.decoder.ff;
-import java.util.HashMap;
+import static org.apache.joshua.util.Constants.OOV_OWNER;
+
import java.util.List;
-import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.JoshuaConfiguration.OOVItem;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.OwnerId;
@@ -31,6 +29,9 @@ import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.util.Constants;
+
+import com.typesafe.config.Config;
/**
* This feature is fired when an out-of-vocabulary word (with respect to the translation model) is
@@ -44,19 +45,12 @@ import org.apache.joshua.decoder.segment_file.Sentence;
*/
public class OOVPenalty extends StatelessFF {
private final OwnerId ownerID;
+ private static final String NAME = "OOVPenalty";
+ private static final float DEFAULT_VALUE = -100f;
- private final HashMap<Integer,Float> oovWeights;
-
- public OOVPenalty(FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, "OOVPenalty", args, config);
- ownerID = OwnerMap.register("oov");
- oovWeights = new HashMap<>();
-
- if (config.oovList != null) {
- for (OOVItem item: config.oovList) {
- oovWeights.put(Vocabulary.id(item.label), item.weight);
- }
- }
+ public OOVPenalty(Config featureConfig, FeatureVector weights) {
+ super(NAME, featureConfig, weights);
+ ownerID = OwnerMap.register(OOV_OWNER);
}
/**
@@ -69,7 +63,7 @@ public class OOVPenalty extends StatelessFF {
Sentence sentence, Accumulator acc) {
if (rule != null && this.ownerID.equals(rule.getOwner())) {
- acc.add(featureId, getValue(rule.getLHS()));
+ acc.add(featureId, DEFAULT_VALUE);
}
return null;
@@ -85,13 +79,8 @@ public class OOVPenalty extends StatelessFF {
@Override
public float estimateCost(Rule rule, Sentence sentence) {
if (rule != null && this.ownerID.equals(rule.getOwner())) {
- return weights.getOrDefault(featureId) * getValue(rule.getLHS());
+ return weights.getOrDefault(featureId) * DEFAULT_VALUE;
}
return 0.0f;
}
-
- private float getValue(int lhs) {
- float defaultValue = -100f;
- return oovWeights.containsKey(lhs) ? oovWeights.get(lhs) : defaultValue;
- }
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
index e1ba328..81df9a1 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
@@ -21,16 +21,16 @@ package org.apache.joshua.decoder.ff;
import java.util.List;
import java.util.Map.Entry;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
-import org.apache.joshua.decoder.ff.tm.Grammar;
import org.apache.joshua.decoder.ff.tm.OwnerId;
import org.apache.joshua.decoder.ff.tm.OwnerMap;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
+import com.typesafe.config.Config;
+
/**
* This feature handles the list of features that are stored with grammar rules in the grammar file.
* These are by convention bound to the PhraseModel feature function and will be prepended by the owner of this
@@ -45,10 +45,10 @@ public class PhraseModel extends StatelessFF {
private final OwnerId owner;
- public PhraseModel(FeatureVector weights, String[] args, JoshuaConfiguration config, Grammar g) {
+ public PhraseModel(OwnerId owner, Config featureConfig, FeatureVector weights) {
// name of this feature is the owner of the grammar
- super(weights, OwnerMap.getOwner(g.getOwner()), args, config);
- this.owner = g.getOwner();
+ super(OwnerMap.getOwner(owner), featureConfig, weights);
+ this.owner = owner;
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
index 4f6a61c..f040660 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
@@ -20,7 +20,6 @@ package org.apache.joshua.decoder.ff;
import java.util.List;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.OwnerId;
@@ -30,6 +29,8 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.phrase.Hypothesis;
import org.apache.joshua.decoder.segment_file.Sentence;
+import com.typesafe.config.Config;
+
/**
* This feature just counts rules that are used. You can restrict it with a number of flags:
*
@@ -45,10 +46,10 @@ public class PhrasePenalty extends StatelessFF {
private final OwnerId owner;
private final float value = 1.0f;
- public PhrasePenalty(FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, "PhrasePenalty", args, config);
- if (parsedArgs.containsKey("owner"))
- this.owner = OwnerMap.register(parsedArgs.get("owner"));
+ public PhrasePenalty(Config featureConfig, FeatureVector weights) {
+ super("PhrasePenalty", featureConfig, weights);
+ if (featureConfig.hasPath("owner"))
+ this.owner = OwnerMap.register(featureConfig.getString("owner"));
else // default
this.owner = OwnerMap.register("pt");
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
index 7a08043..89b777a 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
@@ -24,7 +24,6 @@ import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER_ID;
import java.util.List;
import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.OwnerId;
@@ -34,6 +33,7 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
import com.google.common.cache.Cache;
+import com.typesafe.config.Config;
/**
* This feature fires for rule ids.
@@ -59,14 +59,14 @@ public class RuleFF extends StatelessFF {
private final Cache<Rule, Integer> featureCache;
- public RuleFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, NAME, args, config);
+ public RuleFF(Config featureConfig, FeatureVector weights) {
+ super(NAME, featureConfig, weights);
- ownerRestriction = (parsedArgs.containsKey("owner"));
- owner = ownerRestriction ? OwnerMap.register(parsedArgs.get("owner")) : UNKNOWN_OWNER_ID;
+ ownerRestriction = featureConfig.hasPath("owner");
+ owner = ownerRestriction ? OwnerMap.register(featureConfig.getString("owner")) : UNKNOWN_OWNER_ID;
- if (parsedArgs.containsKey("sides")) {
- final String sideValue = parsedArgs.get("sides");
+ if (featureConfig.hasPath("sides")) {
+ final String sideValue = featureConfig.getString("sides");
if (sideValue.equalsIgnoreCase("source")) {
sides = Sides.SOURCE;
} else if (sideValue.equalsIgnoreCase("target")) {
@@ -81,11 +81,7 @@ public class RuleFF extends StatelessFF {
}
// initialize cache
- if (parsedArgs.containsKey("cacheSize")) {
- featureCache = newBuilder().maximumSize(Integer.parseInt(parsedArgs.get("cacheSize"))).build();
- } else {
- featureCache = newBuilder().maximumSize(config.cachedRuleSize).build();
- }
+ featureCache = newBuilder().maximumSize(featureConfig.getInt("cacheSize")).build();
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java
index f6fcefe..b230ce6 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java
@@ -22,13 +22,14 @@ import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
import java.util.List;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
+import com.typesafe.config.Config;
+
/*
* This feature computes three feature templates: a feature indicating the length of the rule's
* source side, its target side, and a feature that pairs them.
@@ -37,8 +38,8 @@ public abstract class RuleLength extends StatelessFF {
private static final int VALUE = 1;
- public RuleLength(FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, "RuleLength", args, config);
+ public RuleLength(Config featureConfig, FeatureVector weights) {
+ super("RuleLength", featureConfig, weights);
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
index eb7bd50..9e36410 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
@@ -22,7 +22,6 @@ import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
import java.util.List;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
@@ -30,13 +29,15 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
import org.apache.joshua.util.FormatUtils;
+import com.typesafe.config.Config;
+
/*
* Implements the RuleShape feature for source, target, and paired source+target sides.
*/
public class RuleShape extends StatelessFF {
- public RuleShape(FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, "RuleShape", args, config);
+ public RuleShape(Config featureConfig, FeatureVector weights) {
+ super("RuleShape", featureConfig, weights);
}
private enum WordType {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/ScoreAccumulator.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/ScoreAccumulator.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/ScoreAccumulator.java
new file mode 100644
index 0000000..302b15d
--- /dev/null
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/ScoreAccumulator.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff;
+
+public class ScoreAccumulator implements Accumulator {
+ private float score;
+ private final FeatureVector weights;
+
+ public ScoreAccumulator(FeatureVector weights) {
+ this.score = 0.0f;
+ this.weights = weights;
+ }
+
+ @Override
+ public void add(int featureId, float value) {
+ score += value * weights.getOrDefault(featureId);
+ }
+
+ public float getScore() {
+ return score;
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
index 1d0e6e7..0b18537 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
@@ -20,13 +20,14 @@ package org.apache.joshua.decoder.ff;
import java.util.List;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
+import com.typesafe.config.Config;
+
/**
* This feature returns the scored path through the source lattice, which is recorded in a
* SourcePath object.
@@ -39,8 +40,8 @@ public final class SourcePathFF extends StatelessFF {
/*
* This is a single-value feature template, so we cache the weight here.
*/
- public SourcePathFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, "SourcePath", args, config);
+ public SourcePathFF(Config featureConfig, FeatureVector weights) {
+ super("SourcePath", featureConfig, weights);
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
index 1f5d0ed..8071763 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatefulFF.java
@@ -20,7 +20,6 @@ package org.apache.joshua.decoder.ff;
import java.util.List;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
@@ -29,6 +28,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.typesafe.config.Config;
+
/**
* Stateful features contribute dynamic programming state. Unlike earlier versions of Joshua, the
* stateful feature itself is responsible for computing and return its updated state. Each
@@ -48,10 +49,10 @@ public abstract class StatefulFF extends FeatureFunction {
/* This records the state index for each instantiated stateful feature function. */
protected int stateIndex = 0;
- public StatefulFF(FeatureVector weights, String name, String[] args, JoshuaConfiguration config) {
- super(weights, name, args, config);
+ public StatefulFF(final String name, Config featureConfig, FeatureVector weights) {
+ super(name, featureConfig, weights);
- LOG.info("Stateful object with state index {}", GLOBAL_STATE_INDEX);
+ LOG.info("Stateful object with state index {}", GLOBAL_STATE_INDEX);
stateIndex = GLOBAL_STATE_INDEX++;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
index e473c37..ed6bc46 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/StatelessFF.java
@@ -20,13 +20,14 @@ package org.apache.joshua.decoder.ff;
import java.util.List;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.segment_file.Sentence;
+import com.typesafe.config.Config;
+
/**
* Stateless feature functions do not contribute any state. You need not implement this class to
* create a stateless feature function, but it provides a few convenience functions.
@@ -37,8 +38,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
public abstract class StatelessFF extends FeatureFunction {
- public StatelessFF(FeatureVector weights, String name, String[] args, JoshuaConfiguration config) {
- super(weights, name, args, config);
+ public StatelessFF(final String name, Config featureConfig, FeatureVector weights) {
+ super(name, featureConfig, weights);
}
public final boolean isStateful() {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
index d9b894c..723363b 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
@@ -26,7 +26,6 @@ import java.util.LinkedList;
import java.util.List;
import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.state_maintenance.NgramDPState;
@@ -36,6 +35,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
import org.apache.joshua.util.FormatUtils;
import org.apache.joshua.util.io.LineReader;
+import com.typesafe.config.Config;
+
/***
* The RuleBigram feature is an indicator feature that counts target word bigrams that are created when
* a rule is applied. It accepts three parameters:
@@ -59,17 +60,17 @@ public class TargetBigram extends StatefulFF {
private int maxTerms = 1000000;
private int threshold = 0;
- public TargetBigram(FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, "TargetBigram", args, config);
+ public TargetBigram(Config featureConfig, FeatureVector weights) {
+ super("TargetBigram", featureConfig, weights);
- if (parsedArgs.containsKey("threshold"))
- threshold = Integer.parseInt(parsedArgs.get("threshold"));
+ if (featureConfig.hasPath("threshold"))
+ threshold = featureConfig.getInt("threshold");
- if (parsedArgs.containsKey("top-n"))
- maxTerms = Integer.parseInt(parsedArgs.get("top-n"));
+ if (featureConfig.hasPath("top-n"))
+ maxTerms = featureConfig.getInt("top-n");
- if (parsedArgs.containsKey("vocab")) {
- loadVocab(parsedArgs.get("vocab"));
+ if (featureConfig.hasPath("vocab")) {
+ loadVocab(featureConfig.getString("vocab"));
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f2edda0f/joshua-core/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
index c5c4a14..5e8d4e3 100644
--- a/joshua-core/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
+++ b/joshua-core/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
@@ -20,7 +20,6 @@ package org.apache.joshua.decoder.ff;
import java.util.List;
-import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.chart_parser.SourcePath;
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
@@ -28,6 +27,8 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
import org.apache.joshua.decoder.phrase.Hypothesis;
import org.apache.joshua.decoder.segment_file.Sentence;
+import com.typesafe.config.Config;
+
/**
*
* @author Zhifei Li zhifei.work@gmail.com
@@ -38,13 +39,15 @@ public final class WordPenalty extends StatelessFF {
private float OMEGA = -(float) Math.log10(Math.E); // -0.435
private final boolean isCky;
- public WordPenalty(final FeatureVector weights, String[] args, JoshuaConfiguration config) {
- super(weights, "WordPenalty", args, config);
+ public WordPenalty(Config featureConfig, FeatureVector weights) {
+ super("WordPenalty", featureConfig, weights);
- if (parsedArgs.containsKey("value"))
- OMEGA = Float.parseFloat(parsedArgs.get("value"));
-
- isCky = config.search_algorithm.equals("cky");
+ if (featureConfig.hasPath("value")) {
+ OMEGA = (float) featureConfig.getDouble("value");
+ }
+
+ // TODO(fhieber): fix this!
+ isCky = true; //decoderConfig.getConfig().getString("search_algorithm").equals("cky");
}
@Override