You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/29 17:42:43 UTC

[07/10] incubator-joshua git commit: Renamed DecoderThread to DecoderTask

Renamed DecoderThread to DecoderTask


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/0bb29329
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/0bb29329
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/0bb29329

Branch: refs/heads/JOSHUA-304
Commit: 0bb293295e3670c7449815941566578facd247e9
Parents: d1c9c07
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Mon Aug 29 13:53:53 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Mon Aug 29 13:53:53 2016 +0200

----------------------------------------------------------------------
 .../java/org/apache/joshua/decoder/Decoder.java |   6 +-
 .../org/apache/joshua/decoder/DecoderTask.java  | 197 +++++++++++++++++++
 .../apache/joshua/decoder/DecoderThread.java    | 197 -------------------
 .../apache/joshua/decoder/JoshuaDecoder.java    |   2 +-
 .../org/apache/joshua/decoder/Translation.java  |   2 +-
 .../joshua/decoder/chart_parser/Chart.java      |   2 +-
 6 files changed, 203 insertions(+), 203 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/0bb29329/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Decoder.java b/src/main/java/org/apache/joshua/decoder/Decoder.java
index bc64fda..c7b2168 100644
--- a/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -76,7 +76,7 @@ import org.slf4j.LoggerFactory;
  * but also ensures that round-robin parallelization occurs, since RequestParallelizer uses the
  * thread pool before translating each request.
  *
- * A decoding thread is handled by DecoderThread and launched from DecoderThreadRunner. The purpose
+ * A decoding thread is handled by DecoderTask and launched from DecoderThreadRunner. The purpose
  * of the runner is to record where to place the translated sentence when it is done (i.e., which
  * Translations object). Translations itself is an iterator whose next() call blocks until the next
  * translation is available.
@@ -223,8 +223,8 @@ public class Decoder {
    */
   public Translation decode(Sentence sentence) {
     try {
-      DecoderThread decoderThread = new DecoderThread(this.grammars, Decoder.weights, this.featureFunctions, joshuaConfiguration);
-      return decoderThread.translate(sentence);
+      DecoderTask decoderTask = new DecoderTask(this.grammars, Decoder.weights, this.featureFunctions, joshuaConfiguration);
+      return decoderTask.translate(sentence);
     } catch (IOException e) {
       throw new RuntimeException(String.format(
               "Input %d: FATAL UNCAUGHT EXCEPTION: %s", sentence.id(), e.getMessage()), e);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/0bb29329/src/main/java/org/apache/joshua/decoder/DecoderTask.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/DecoderTask.java b/src/main/java/org/apache/joshua/decoder/DecoderTask.java
new file mode 100644
index 0000000..e6ce331
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/DecoderTask.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.joshua.decoder.chart_parser.Chart;
+import org.apache.joshua.decoder.ff.FeatureFunction;
+import org.apache.joshua.decoder.ff.FeatureVector;
+import org.apache.joshua.decoder.ff.SourceDependentFF;
+import org.apache.joshua.decoder.ff.tm.Grammar;
+import org.apache.joshua.decoder.hypergraph.ForestWalker;
+import org.apache.joshua.decoder.hypergraph.GrammarBuilderWalkerFunction;
+import org.apache.joshua.decoder.hypergraph.HyperGraph;
+import org.apache.joshua.decoder.phrase.Stacks;
+import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.corpus.Vocabulary;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class handles decoding of individual Sentence objects (which can represent plain sentences
+ * or lattices). A single sentence can be decoded by a call to translate() and, if an InputHandler
+ * is used, many sentences can be decoded in a thread-safe manner via a single call to
+ * translateAll(), which continually queries the InputHandler for sentences until they have all been
+ * consumed and translated.
+ * 
+ * The DecoderFactory class is responsible for launching the threads.
+ * 
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
+ */
+
+public class DecoderTask {
+  private static final Logger LOG = LoggerFactory.getLogger(DecoderTask.class);
+
+  private final JoshuaConfiguration joshuaConfiguration;
+  /*
+   * these variables may be the same across all threads (e.g., just copy from DecoderFactory), or
+   * differ from thread to thread
+   */
+  private final List<Grammar> allGrammars;
+  private final List<FeatureFunction> featureFunctions;
+
+
+  // ===============================================================
+  // Constructor
+  // ===============================================================
+  //TODO: (kellens) why is weights unused?
+  public DecoderTask(List<Grammar> grammars, FeatureVector weights,
+                     List<FeatureFunction> featureFunctions, JoshuaConfiguration joshuaConfiguration) throws IOException {
+
+    this.joshuaConfiguration = joshuaConfiguration;
+    this.allGrammars = grammars;
+
+    this.featureFunctions = new ArrayList<>();
+    for (FeatureFunction ff : featureFunctions) {
+      if (ff instanceof SourceDependentFF) {
+        this.featureFunctions.add(((SourceDependentFF) ff).clone());
+      } else {
+        this.featureFunctions.add(ff);
+      }
+    }
+  }
+
+  // ===============================================================
+  // Methods
+  // ===============================================================
+
+  /**
+   * Translate a sentence.
+   * 
+   * @param sentence The sentence to be translated.
+   * @return the sentence {@link org.apache.joshua.decoder.Translation}
+   */
+  public Translation translate(Sentence sentence) {
+
+    LOG.info("Input {}: {}", sentence.id(), sentence.fullSource());
+
+    if (sentence.target() != null)
+      LOG.info("Input {}: Constraining to target sentence '{}'",
+          sentence.id(), sentence.target());
+
+    // skip blank sentences
+    if (sentence.isEmpty()) {
+      LOG.info("Translation {}: Translation took 0 seconds", sentence.id());
+      return new Translation(sentence, null, featureFunctions, joshuaConfiguration);
+    }
+
+    long startTime = System.currentTimeMillis();
+
+    int numGrammars = allGrammars.size();
+    Grammar[] grammars = new Grammar[numGrammars];
+
+    for (int i = 0; i < allGrammars.size(); i++)
+      grammars[i] = allGrammars.get(i);
+
+    if (joshuaConfiguration.segment_oovs)
+      sentence.segmentOOVs(grammars);
+
+    /**
+     * Joshua supports (as of September 2014) both phrase-based and hierarchical decoding. Here
+     * we build the appropriate chart. The output of both systems is a hypergraph, which is then
+     * used for further processing (e.g., k-best extraction).
+     */
+    HyperGraph hypergraph = null;
+    try {
+
+      if (joshuaConfiguration.search_algorithm.equals("stack")) {
+        Stacks stacks = new Stacks(sentence, this.featureFunctions, grammars, joshuaConfiguration);
+
+        hypergraph = stacks.search();
+      } else {
+        /* Seeding: the chart only sees the grammars, not the factories */
+        Chart chart = new Chart(sentence, this.featureFunctions, grammars,
+            joshuaConfiguration.goal_symbol, joshuaConfiguration);
+
+        hypergraph = (joshuaConfiguration.use_dot_chart) 
+            ? chart.expand() 
+                : chart.expandSansDotChart();
+      }
+
+    } catch (java.lang.OutOfMemoryError e) {
+      LOG.error("Input {}: out of memory", sentence.id());
+      hypergraph = null;
+    }
+
+    float seconds = (System.currentTimeMillis() - startTime) / 1000.0f;
+    LOG.info("Input {}: Translation took {} seconds", sentence.id(), seconds);
+    LOG.info("Input {}: Memory used is {} MB", sentence.id(), (Runtime
+        .getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1000000.0);
+
+    /* Return the translation unless we're doing synchronous parsing. */
+    if (!joshuaConfiguration.parse || hypergraph == null) {
+      return new Translation(sentence, hypergraph, featureFunctions, joshuaConfiguration);
+    }
+
+    /*****************************************************************************************/
+
+    /*
+     * Synchronous parsing.
+     * 
+     * Step 1. Traverse the hypergraph to create a grammar for the second-pass parse.
+     */
+    Grammar newGrammar = getGrammarFromHyperGraph(joshuaConfiguration.goal_symbol, hypergraph);
+    newGrammar.sortGrammar(this.featureFunctions);
+    long sortTime = System.currentTimeMillis();
+    LOG.info("Sentence {}: New grammar has {} rules.", sentence.id(),
+        newGrammar.getNumRules());
+
+    /* Step 2. Create a new chart and parse with the instantiated grammar. */
+    Grammar[] newGrammarArray = new Grammar[] { newGrammar };
+    Sentence targetSentence = new Sentence(sentence.target(), sentence.id(), joshuaConfiguration);
+    Chart chart = new Chart(targetSentence, featureFunctions, newGrammarArray, "GOAL",joshuaConfiguration);
+    int goalSymbol = GrammarBuilderWalkerFunction.goalSymbol(hypergraph);
+    String goalSymbolString = Vocabulary.word(goalSymbol);
+    LOG.info("Sentence {}: goal symbol is {} ({}).", sentence.id(),
+        goalSymbolString, goalSymbol);
+    chart.setGoalSymbolID(goalSymbol);
+
+    /* Parsing */
+    HyperGraph englishParse = chart.expand();
+    long secondParseTime = System.currentTimeMillis();
+    LOG.info("Sentence {}: Finished second chart expansion ({} seconds).",
+        sentence.id(), (secondParseTime - sortTime) / 1000);
+    LOG.info("Sentence {} total time: {} seconds.\n", sentence.id(),
+        (secondParseTime - startTime) / 1000);
+    LOG.info("Memory used after sentence {} is {} MB", sentence.id(), (Runtime
+        .getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1000000.0);
+    return new Translation(sentence, englishParse, featureFunctions, joshuaConfiguration); // or do something else
+  }
+
+  private Grammar getGrammarFromHyperGraph(String goal, HyperGraph hg) {
+    GrammarBuilderWalkerFunction f = new GrammarBuilderWalkerFunction(goal,joshuaConfiguration);
+    ForestWalker walker = new ForestWalker();
+    walker.walk(hg.goalNode, f);
+    return f.getGrammar();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/0bb29329/src/main/java/org/apache/joshua/decoder/DecoderThread.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/DecoderThread.java b/src/main/java/org/apache/joshua/decoder/DecoderThread.java
deleted file mode 100644
index a6f39b1..0000000
--- a/src/main/java/org/apache/joshua/decoder/DecoderThread.java
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.joshua.decoder.chart_parser.Chart;
-import org.apache.joshua.decoder.ff.FeatureFunction;
-import org.apache.joshua.decoder.ff.FeatureVector;
-import org.apache.joshua.decoder.ff.SourceDependentFF;
-import org.apache.joshua.decoder.ff.tm.Grammar;
-import org.apache.joshua.decoder.hypergraph.ForestWalker;
-import org.apache.joshua.decoder.hypergraph.GrammarBuilderWalkerFunction;
-import org.apache.joshua.decoder.hypergraph.HyperGraph;
-import org.apache.joshua.decoder.phrase.Stacks;
-import org.apache.joshua.decoder.segment_file.Sentence;
-import org.apache.joshua.corpus.Vocabulary;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * This class handles decoding of individual Sentence objects (which can represent plain sentences
- * or lattices). A single sentence can be decoded by a call to translate() and, if an InputHandler
- * is used, many sentences can be decoded in a thread-safe manner via a single call to
- * translateAll(), which continually queries the InputHandler for sentences until they have all been
- * consumed and translated.
- * 
- * The DecoderFactory class is responsible for launching the threads.
- * 
- * @author Matt Post post@cs.jhu.edu
- * @author Zhifei Li, zhifei.work@gmail.com
- */
-
-public class DecoderThread {
-  private static final Logger LOG = LoggerFactory.getLogger(DecoderThread.class);
-
-  private final JoshuaConfiguration joshuaConfiguration;
-  /*
-   * these variables may be the same across all threads (e.g., just copy from DecoderFactory), or
-   * differ from thread to thread
-   */
-  private final List<Grammar> allGrammars;
-  private final List<FeatureFunction> featureFunctions;
-
-
-  // ===============================================================
-  // Constructor
-  // ===============================================================
-  //TODO: (kellens) why is weights unused?
-  public DecoderThread(List<Grammar> grammars, FeatureVector weights,
-                       List<FeatureFunction> featureFunctions, JoshuaConfiguration joshuaConfiguration) throws IOException {
-
-    this.joshuaConfiguration = joshuaConfiguration;
-    this.allGrammars = grammars;
-
-    this.featureFunctions = new ArrayList<>();
-    for (FeatureFunction ff : featureFunctions) {
-      if (ff instanceof SourceDependentFF) {
-        this.featureFunctions.add(((SourceDependentFF) ff).clone());
-      } else {
-        this.featureFunctions.add(ff);
-      }
-    }
-  }
-
-  // ===============================================================
-  // Methods
-  // ===============================================================
-
-  /**
-   * Translate a sentence.
-   * 
-   * @param sentence The sentence to be translated.
-   * @return the sentence {@link org.apache.joshua.decoder.Translation}
-   */
-  public Translation translate(Sentence sentence) {
-
-    LOG.info("Input {}: {}", sentence.id(), sentence.fullSource());
-
-    if (sentence.target() != null)
-      LOG.info("Input {}: Constraining to target sentence '{}'",
-          sentence.id(), sentence.target());
-
-    // skip blank sentences
-    if (sentence.isEmpty()) {
-      LOG.info("Translation {}: Translation took 0 seconds", sentence.id());
-      return new Translation(sentence, null, featureFunctions, joshuaConfiguration);
-    }
-
-    long startTime = System.currentTimeMillis();
-
-    int numGrammars = allGrammars.size();
-    Grammar[] grammars = new Grammar[numGrammars];
-
-    for (int i = 0; i < allGrammars.size(); i++)
-      grammars[i] = allGrammars.get(i);
-
-    if (joshuaConfiguration.segment_oovs)
-      sentence.segmentOOVs(grammars);
-
-    /**
-     * Joshua supports (as of September 2014) both phrase-based and hierarchical decoding. Here
-     * we build the appropriate chart. The output of both systems is a hypergraph, which is then
-     * used for further processing (e.g., k-best extraction).
-     */
-    HyperGraph hypergraph = null;
-    try {
-
-      if (joshuaConfiguration.search_algorithm.equals("stack")) {
-        Stacks stacks = new Stacks(sentence, this.featureFunctions, grammars, joshuaConfiguration);
-
-        hypergraph = stacks.search();
-      } else {
-        /* Seeding: the chart only sees the grammars, not the factories */
-        Chart chart = new Chart(sentence, this.featureFunctions, grammars,
-            joshuaConfiguration.goal_symbol, joshuaConfiguration);
-
-        hypergraph = (joshuaConfiguration.use_dot_chart) 
-            ? chart.expand() 
-                : chart.expandSansDotChart();
-      }
-
-    } catch (java.lang.OutOfMemoryError e) {
-      LOG.error("Input {}: out of memory", sentence.id());
-      hypergraph = null;
-    }
-
-    float seconds = (System.currentTimeMillis() - startTime) / 1000.0f;
-    LOG.info("Input {}: Translation took {} seconds", sentence.id(), seconds);
-    LOG.info("Input {}: Memory used is {} MB", sentence.id(), (Runtime
-        .getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1000000.0);
-
-    /* Return the translation unless we're doing synchronous parsing. */
-    if (!joshuaConfiguration.parse || hypergraph == null) {
-      return new Translation(sentence, hypergraph, featureFunctions, joshuaConfiguration);
-    }
-
-    /*****************************************************************************************/
-
-    /*
-     * Synchronous parsing.
-     * 
-     * Step 1. Traverse the hypergraph to create a grammar for the second-pass parse.
-     */
-    Grammar newGrammar = getGrammarFromHyperGraph(joshuaConfiguration.goal_symbol, hypergraph);
-    newGrammar.sortGrammar(this.featureFunctions);
-    long sortTime = System.currentTimeMillis();
-    LOG.info("Sentence {}: New grammar has {} rules.", sentence.id(),
-        newGrammar.getNumRules());
-
-    /* Step 2. Create a new chart and parse with the instantiated grammar. */
-    Grammar[] newGrammarArray = new Grammar[] { newGrammar };
-    Sentence targetSentence = new Sentence(sentence.target(), sentence.id(), joshuaConfiguration);
-    Chart chart = new Chart(targetSentence, featureFunctions, newGrammarArray, "GOAL",joshuaConfiguration);
-    int goalSymbol = GrammarBuilderWalkerFunction.goalSymbol(hypergraph);
-    String goalSymbolString = Vocabulary.word(goalSymbol);
-    LOG.info("Sentence {}: goal symbol is {} ({}).", sentence.id(),
-        goalSymbolString, goalSymbol);
-    chart.setGoalSymbolID(goalSymbol);
-
-    /* Parsing */
-    HyperGraph englishParse = chart.expand();
-    long secondParseTime = System.currentTimeMillis();
-    LOG.info("Sentence {}: Finished second chart expansion ({} seconds).",
-        sentence.id(), (secondParseTime - sortTime) / 1000);
-    LOG.info("Sentence {} total time: {} seconds.\n", sentence.id(),
-        (secondParseTime - startTime) / 1000);
-    LOG.info("Memory used after sentence {} is {} MB", sentence.id(), (Runtime
-        .getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1000000.0);
-    return new Translation(sentence, englishParse, featureFunctions, joshuaConfiguration); // or do something else
-  }
-
-  private Grammar getGrammarFromHyperGraph(String goal, HyperGraph hg) {
-    GrammarBuilderWalkerFunction f = new GrammarBuilderWalkerFunction(goal,joshuaConfiguration);
-    ForestWalker walker = new ForestWalker();
-    walker.walk(hg.goalNode, f);
-    return f.getGrammar();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/0bb29329/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java b/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
index 4c31655..b1b9d1e 100644
--- a/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
+++ b/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
@@ -39,7 +39,7 @@ import org.slf4j.LoggerFactory;
 
 /**
  * Implements decoder initialization, including interaction with <code>JoshuaConfiguration</code>
- * and <code>DecoderThread</code>.
+ * and <code>DecoderTask</code>.
  * 
  * @author Zhifei Li, zhifei.work@gmail.com
  * @author wren ng thornton wren@users.sourceforge.net

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/0bb29329/src/main/java/org/apache/joshua/decoder/Translation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Translation.java b/src/main/java/org/apache/joshua/decoder/Translation.java
index 1688805..142ff05 100644
--- a/src/main/java/org/apache/joshua/decoder/Translation.java
+++ b/src/main/java/org/apache/joshua/decoder/Translation.java
@@ -44,7 +44,7 @@ import org.slf4j.LoggerFactory;
 /**
  * This class represents translated input objects (sentences or lattices). It is aware of the source
  * sentence and id and contains the decoded hypergraph. Translation objects are returned by
- * DecoderThread instances to the InputHandler, where they are assembled in order for output.
+ * DecoderTask instances to the InputHandler, where they are assembled in order for output.
  * 
  * @author Matt Post post@cs.jhu.edu
  * @author Felix Hieber fhieber@amazon.com

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/0bb29329/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
index 5c123f9..bd91a6f 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
@@ -108,7 +108,7 @@ public class Chart {
    * for the sentence() method, we should just accept a Segment instead of the
    * sentence, segmentID, and constraintSpans parameters. We have the symbol
    * table already, so we can do the integerization here instead of in
-   * DecoderThread. GrammarFactory.getGrammarForSentence will want the
+   * DecoderTask. GrammarFactory.getGrammarForSentence will want the
    * integerized sentence as well, but then we'll need to adjust that interface
    * to deal with (non-trivial) lattices too. Of course, we get passed the
    * grammars too so we could move all of that into here.