You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@joshua.apache.org by mj...@apache.org on 2016/11/17 18:25:06 UTC

[1/8] incubator-joshua git commit: formatting

Repository: incubator-joshua
Updated Branches:
  refs/heads/save_custom_grammars [created] 8b39f2eef


formatting


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/8b39f2ee
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/8b39f2ee
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/8b39f2ee

Branch: refs/heads/save_custom_grammars
Commit: 8b39f2eef3699abe84d08a576716f8c9baa97a9c
Parents: d142c1f
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Nov 17 13:23:38 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Nov 17 13:24:07 2016 -0500

----------------------------------------------------------------------
 .../org/apache/joshua/server/ServerThread.java  | 24 +++++++++-----------
 1 file changed, 11 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/8b39f2ee/src/main/java/org/apache/joshua/server/ServerThread.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/server/ServerThread.java b/src/main/java/org/apache/joshua/server/ServerThread.java
index 0eb8109..3a76e1b 100644
--- a/src/main/java/org/apache/joshua/server/ServerThread.java
+++ b/src/main/java/org/apache/joshua/server/ServerThread.java
@@ -168,7 +168,8 @@ public class ServerThread extends Thread implements HttpHandler {
       handleMetadata(meta, message);
 
     for (Translation translation: translationResponseStream) {
-      LOG.info("TRANSLATION: '{}' with {} k-best items", translation, translation.getStructuredTranslations().size());
+      LOG.info("TRANSLATION: '{}' with {} k-best items, score {}", 
+          translation, translation.getStructuredTranslations().size());
       message.addTranslation(translation);
     }
 
@@ -221,8 +222,7 @@ public class ServerThread extends Thread implements HttpHandler {
       break;
     }
     case "add_rule": {
-      
-      try {
+    
       String argTokens[] = args.split(" \\|\\|\\| ");
 
       if (argTokens.length < 3) {
@@ -236,25 +236,25 @@ public class ServerThread extends Thread implements HttpHandler {
       String featureStr = "";
       String alignmentStr = "";
       if (argTokens.length > 3)
-        featureStr = argTokens[3];
+        featureStr = argTokens[3].trim();
       if (argTokens.length > 4)
-        alignmentStr = " ||| " + argTokens[4];
+        alignmentStr = argTokens[4].trim();
 
       /* Prepend source and target side nonterminals for phrase-based decoding. Probably better
        * handled in each grammar type's addRule() function.
        */
-      String ruleString = String.format("%s ||| %s ||| %s ||| -1 %s %s", 
-          lhs, source, target, featureStr, alignmentStr);
+      String ruleString = String.format("%s ||| %s ||| %s ||| -1", lhs, source, target);
+      if (! featureStr.equals(""))
+        ruleString += featureStr;
+      if (! alignmentStr.equals(""))
+        ruleString += " ||| " + alignmentStr;
+
       Rule rule = new HieroFormatReader().parseLine(ruleString);
       decoder.addCustomRule(rule);
 
       LOG.info("Added custom rule {}", rule.toString());
 
       break;
-      
-      } catch (Exception e) {
-        e.printStackTrace();
-      }
     }
     case "list_rules": {
 
@@ -285,8 +285,6 @@ public class ServerThread extends Thread implements HttpHandler {
 
       Rule rule = new HieroFormatReader().parseLine(args);
 
-      LOG.info("remove_rule " + rule);
-
       Trie trie = decoder.getCustomPhraseTable().getTrieRoot();
       int[] sourceTokens = rule.getFrench();
       for (int sourceToken : sourceTokens) {

[2/8] incubator-joshua git commit: bugfix in loading phrase-based grammars

Posted by mj...@apache.org.

bugfix in loading phrase-based grammars


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/9fcbfa16
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/9fcbfa16
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/9fcbfa16

Branch: refs/heads/save_custom_grammars
Commit: 9fcbfa16e8d9888286f43c88a2177108ded6900b
Parents: 75f5b9a
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Nov 17 09:39:56 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Nov 17 13:24:07 2016 -0500

----------------------------------------------------------------------
 src/main/java/org/apache/joshua/decoder/Decoder.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9fcbfa16/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Decoder.java b/src/main/java/org/apache/joshua/decoder/Decoder.java
index d93cfe5..1df1568 100644
--- a/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -453,7 +453,7 @@ public class Decoder {
       String path = parsedArgs.get("path");
       
       Grammar grammar;
-      if (type.equals("moses") && ! type.equals("phrase")) {
+      if (type.equals("moses") || type.equals("phrase")) {
         joshuaConfiguration.search_algorithm = "stack";
         grammar = new PhraseTable(path, owner, type, joshuaConfiguration);

[4/8] incubator-joshua git commit: changed logic of glue grammars and custom grammar

Posted by mj...@apache.org.

changed logic of glue grammars and custom grammar

Previously, custom grammars were always created. Now, they are only instantiated if a grammar with the owner "custom" is found in the config file. A suitable warning is thrown with a helpful error message if the user tries to add a rule when no custom grammar was loaded.

A glue grammar is created inline if no grammar owned by "glue" was found. Previously, one was created if there were no grammars period. This new method is more robust. It will be nice in version 7 to get rid of the separate PhraseTable, consolidating that and MemoryBasedBatchGrammar. Phrase-based decoding doesn't have to worry about there being glue rules present since the glue rules for phrase-based are hard-coded and part of the search algorithm, instead of just being treated like generic rules (like in CKY).


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/ef07c532
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/ef07c532
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/ef07c532

Branch: refs/heads/save_custom_grammars
Commit: ef07c5326f737248ad494f401dff35d3414e0f56
Parents: 2229d1b
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Nov 16 21:00:36 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Nov 17 13:24:07 2016 -0500

----------------------------------------------------------------------
 .../java/org/apache/joshua/decoder/Decoder.java | 104 ++++++++++---------
 1 file changed, 56 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef07c532/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Decoder.java b/src/main/java/org/apache/joshua/decoder/Decoder.java
index ca2bfaa..84824a5 100644
--- a/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -435,64 +435,72 @@ public class Decoder {
    */
   private void initializeTranslationGrammars() throws IOException {
 
-    if (joshuaConfiguration.tms.size() > 0) {
-
-      // collect packedGrammars to check if they use a shared vocabulary
-      final List<PackedGrammar> packed_grammars = new ArrayList<>();
-
-      // tm = {thrax/hiero,packed,samt,moses} OWNER LIMIT FILE
-      for (String tmLine : joshuaConfiguration.tms) {
-
-        String type = tmLine.substring(0,  tmLine.indexOf(' '));
-        String[] args = tmLine.substring(tmLine.indexOf(' ')).trim().split("\\s+");
-        HashMap<String, String> parsedArgs = FeatureFunction.parseArgs(args);
-
-        String owner = parsedArgs.get("owner");
-        int span_limit = Integer.parseInt(parsedArgs.get("maxspan"));
-        String path = parsedArgs.get("path");
-
-        Grammar grammar;
-        if (! type.equals("moses") && ! type.equals("phrase")) {
-          if (new File(path).isDirectory()) {
-            try {
-              PackedGrammar packed_grammar = new PackedGrammar(path, span_limit, owner, type, joshuaConfiguration);
-              packed_grammars.add(packed_grammar);
-              grammar = packed_grammar;
-            } catch (FileNotFoundException e) {
-              String msg = String.format("Couldn't load packed grammar from '%s'", path)
-                  + "Perhaps it doesn't exist, or it may be an old packed file format.";
-              throw new RuntimeException(msg);
-            }
-          } else {
-            // thrax, hiero, samt
-            grammar = new MemoryBasedBatchGrammar(type, path, owner,
-                joshuaConfiguration.default_non_terminal, span_limit, joshuaConfiguration);
-          }
+    // collect packedGrammars to check if they use a shared vocabulary
+    final List<PackedGrammar> packed_grammars = new ArrayList<>();
+    
+    // record the glue grammar so we can make sure there is one
+    Grammar glueGrammar = null;
 
-        } else {
+    // tm = {thrax/hiero,packed,samt,moses} OWNER LIMIT FILE
+    for (String tmLine : joshuaConfiguration.tms) {
+
+      String type = tmLine.substring(0,  tmLine.indexOf(' '));
+      String[] args = tmLine.substring(tmLine.indexOf(' ')).trim().split("\\s+");
+      HashMap<String, String> parsedArgs = FeatureFunction.parseArgs(args);
 
-          joshuaConfiguration.search_algorithm = "stack";
-          grammar = new PhraseTable(path, owner, type, joshuaConfiguration);
+      String owner = parsedArgs.get("owner");
+      int span_limit = Integer.parseInt(parsedArgs.get("maxspan"));
+      String path = parsedArgs.get("path");
+      
+      Grammar grammar;
+      if (type.equals("moses") && ! type.equals("phrase")) {
+        joshuaConfiguration.search_algorithm = "stack";
+        grammar = new PhraseTable(path, owner, type, joshuaConfiguration);
+
+      } else {
+        if (new File(path).isDirectory()) {
+          try {
+            PackedGrammar packed_grammar = new PackedGrammar(path, span_limit, owner, type, joshuaConfiguration);
+            packed_grammars.add(packed_grammar);
+            grammar = packed_grammar;
+          } catch (FileNotFoundException e) {
+            String msg = String.format("Couldn't load packed grammar from '%s'", path)
+                + "Perhaps it doesn't exist, or it may be an old packed file format.";
+            throw new RuntimeException(msg);
+          }
+        } else {
+          // thrax, hiero, samt
+          grammar = new MemoryBasedBatchGrammar(type, path, owner,
+              joshuaConfiguration.default_non_terminal, span_limit, joshuaConfiguration);
         }
+      }
+
+      this.grammars.add(grammar);
 
-        this.grammars.add(grammar);
+      /* Record whether we saw a custom grammar for adding phrase entries */
+      if (getOwner(grammar.getOwner()).equals("custom")) {
+        this.customPhraseTable = grammar;
+      } else if (getOwner(grammar.getOwner()).equals("glue")) {
+        glueGrammar = grammar;
       }
+    }
 
-      checkSharedVocabularyChecksumsForPackedGrammars(packed_grammars);
+    checkSharedVocabularyChecksumsForPackedGrammars(packed_grammars);
 
-    } else {
-      LOG.warn("no grammars supplied!  Supplying dummy glue grammar.");
-      MemoryBasedBatchGrammar glueGrammar = new MemoryBasedBatchGrammar("glue", joshuaConfiguration, -1);
-      glueGrammar.addGlueRules(featureFunctions);
+    /* Create a glue grammar if none was provided */
+    if (joshuaConfiguration.search_algorithm.equals("cky") && glueGrammar == null) {
+      LOG.warn("No glue grammar found! Creating dummy glue grammar.");
+      glueGrammar = new MemoryBasedBatchGrammar("glue", joshuaConfiguration, -1);
+      ((MemoryBasedBatchGrammar)glueGrammar).addGlueRules(featureFunctions);
       this.grammars.add(glueGrammar);
     }
+    
+    /* Create a custom phrase table if none was found in the config file */
+    if (customPhraseTable == null) {
+      this.customPhraseTable = new PhraseTable("custom.grammar", "custom", "phrase", joshuaConfiguration);
+      this.grammars.add(this.customPhraseTable);
+    }
 
-    /* Add the grammar for custom entries */
-    if (joshuaConfiguration.search_algorithm.equals("stack"))
-      this.customPhraseTable = new PhraseTable(null, "custom", "phrase", joshuaConfiguration);
-    else
-      this.customPhraseTable = new MemoryBasedBatchGrammar("custom", joshuaConfiguration, 20);
-    this.grammars.add(this.customPhraseTable);
 
     /* Create an epsilon-deleting grammar */
     if (joshuaConfiguration.lattice_decoding) {

[7/8] incubator-joshua git commit: custom phrase table now saved on deletion

Posted by mj...@apache.org.

custom phrase table now saved on deletion


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/d142c1f0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/d142c1f0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/d142c1f0

Branch: refs/heads/save_custom_grammars
Commit: d142c1f0a951fd3c3a564d05c99b8a72c5579e7f
Parents: 9fcbfa1
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Nov 17 13:23:32 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Nov 17 13:24:07 2016 -0500

----------------------------------------------------------------------
 .../java/org/apache/joshua/decoder/Decoder.java | 22 +++++++++++++-------
 .../org/apache/joshua/server/ServerThread.java  |  3 +++
 2 files changed, 17 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d142c1f0/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Decoder.java b/src/main/java/org/apache/joshua/decoder/Decoder.java
index 1df1568..7b202b9 100644
--- a/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -674,20 +674,26 @@ public class Decoder {
    * @param rule the rule to add
    */
   public void addCustomRule(Rule rule) {
+    if (getCustomPhraseTable() != null) {
+      getCustomPhraseTable().addRule(rule);
+      rule.estimateRuleCost(featureFunctions);
+      getCustomPhraseTable().save();
+    }
+  }
+
+  public Grammar getCustomPhraseTable() {
     if (customPhraseTable == null) {
-      LOG.warn("No custom grammar was found in the config file; can't add rule");
+      LOG.warn("No custom grammar was found in the config file, so none was instantiated");
       LOG.warn("Add the following line to your config and restart Joshua to enable it:");
       LOG.warn("  tm = phrase -owner custom -maxspan 20 -path /path/to/custom.grammar");
       LOG.warn("The owner must be 'custom'");
     }
-      
-    customPhraseTable.addRule(rule);
-    rule.estimateRuleCost(featureFunctions);
-    
-    customPhraseTable.save();
-  }
 
-  public Grammar getCustomPhraseTable() {
     return customPhraseTable;
   }
+  
+  public void saveCustomPhraseTable() {
+    if (getCustomPhraseTable() != null)
+      getCustomPhraseTable().save();
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d142c1f0/src/main/java/org/apache/joshua/server/ServerThread.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/server/ServerThread.java b/src/main/java/org/apache/joshua/server/ServerThread.java
index 6cb4bf1..0eb8109 100644
--- a/src/main/java/org/apache/joshua/server/ServerThread.java
+++ b/src/main/java/org/apache/joshua/server/ServerThread.java
@@ -305,6 +305,9 @@ public class ServerThread extends Thread implements HttpHandler {
           }
         }
       }
+      
+      decoder.saveCustomPhraseTable();
+      
       break;
     }
     default: {

[8/8] incubator-joshua git commit: don't add a custom phrase table if it's not found in the config file

Posted by mj...@apache.org.

don't add a custom phrase table if it's not found in the config file


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/75f5b9a0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/75f5b9a0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/75f5b9a0

Branch: refs/heads/save_custom_grammars
Commit: 75f5b9a08974c49db8a2c9f5c0251013c6d5201a
Parents: 3e851fa
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Nov 17 09:39:41 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Nov 17 13:24:07 2016 -0500

----------------------------------------------------------------------
 demo/README.md                                       | 11 +----------
 demo/demo.config                                     |  3 +++
 src/main/java/org/apache/joshua/decoder/Decoder.java |  7 -------
 3 files changed, 4 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/75f5b9a0/demo/README.md
----------------------------------------------------------------------
diff --git a/demo/README.md b/demo/README.md
index d086532..fcc826c 100644
--- a/demo/README.md
+++ b/demo/README.md
@@ -3,16 +3,7 @@ feed it sentences, add custom rules, and view the translations.
 
 There are two steps:
 
-1. Start Joshua in server mode
-
-       $JOSHUA/bin/joshua -server-type http -server-port 5674 \
-         -feature-function OOVPenalty \
-         -feature-function "PhrasePenalty -owner custom" \
-         -weight-overwrite "OOVPenalty 1 PhrasePenalty -1" \
-         -mark-oovs -lowercase -projectcase -output-format %S
-
-   Alternately, you can use the config file in this directory, which
-   contains all the above parameteres, and simply run it like this:
+1. Start Joshua in server mode, using the config file provided in this directory:
 
        $JOSHUA/bin/joshua -config demo.config
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/75f5b9a0/demo/demo.config
----------------------------------------------------------------------
diff --git a/demo/demo.config b/demo/demo.config
index 2a4c109..27050ac 100644
--- a/demo/demo.config
+++ b/demo/demo.config
@@ -13,6 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# The custom grammar, which allows adding custom phrase table entries
+tm = phrase -owner custom -maxspan 0 -path custom.grammar
+
 server-type = http
 server-port = 5674
 mark-oovs = true

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/75f5b9a0/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Decoder.java b/src/main/java/org/apache/joshua/decoder/Decoder.java
index 84824a5..d93cfe5 100644
--- a/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -495,13 +495,6 @@ public class Decoder {
       this.grammars.add(glueGrammar);
     }
     
-    /* Create a custom phrase table if none was found in the config file */
-    if (customPhraseTable == null) {
-      this.customPhraseTable = new PhraseTable("custom.grammar", "custom", "phrase", joshuaConfiguration);
-      this.grammars.add(this.customPhraseTable);
-    }
-
-
     /* Create an epsilon-deleting grammar */
     if (joshuaConfiguration.lattice_decoding) {
       LOG.info("Creating an epsilon-deleting grammar");

[3/8] incubator-joshua git commit: added saving of custom grammar

Posted by mj...@apache.org.

added saving of custom grammar


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2229d1b9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2229d1b9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2229d1b9

Branch: refs/heads/save_custom_grammars
Commit: 2229d1b9f4ed167507d721ff2cfe87ccfadf28a3
Parents: 81baa65
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Nov 16 20:57:53 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Nov 17 13:24:07 2016 -0500

----------------------------------------------------------------------
 .../java/org/apache/joshua/decoder/Decoder.java |  9 +++
 .../apache/joshua/decoder/ff/tm/Grammar.java    |  5 ++
 .../tm/hash_based/MemoryBasedBatchGrammar.java  | 73 +++++++++++++++++++-
 .../decoder/ff/tm/packed/PackedGrammar.java     |  5 ++
 .../joshua/decoder/phrase/PhraseTable.java      | 10 +++
 5 files changed, 99 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2229d1b9/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Decoder.java b/src/main/java/org/apache/joshua/decoder/Decoder.java
index c15898c..ca2bfaa 100644
--- a/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -673,8 +673,17 @@ public class Decoder {
    * @param rule the rule to add
    */
   public void addCustomRule(Rule rule) {
+    if (customPhraseTable == null) {
+      LOG.warn("No custom grammar was found in the config file; can't add rule");
+      LOG.warn("Add the following line to your config and restart Joshua to enable it:");
+      LOG.warn("  tm = phrase -owner custom -maxspan 20 -path /path/to/custom.grammar");
+      LOG.warn("The owner must be 'custom'");
+    }
+      
     customPhraseTable.addRule(rule);
     rule.estimateRuleCost(featureFunctions);
+    
+    customPhraseTable.save();
   }
 
   public Grammar getCustomPhraseTable() {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2229d1b9/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
index 8f90d1b..27d3862 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
@@ -117,4 +117,9 @@ public interface Grammar {
    * @param rule the {@link org.apache.joshua.decoder.ff.tm.Rule}
    */
   void addRule(Rule rule);
+  
+  /**
+   * Write the grammar out to some permanent location (disk, database, etc).
+   */
+  void save();
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2229d1b9/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
index ebfa996..f53affa 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
@@ -18,7 +18,16 @@
  */
 package org.apache.joshua.decoder.ff.tm.hash_based;
 
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
 import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -69,15 +78,27 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
   private GrammarReader<Rule> modelReader;
 
   /**
-   * Constructor used by Decoder mostly. Default spanLimit of 20
+   * Constructor used by Decoder mostly.
    * @param owner the associated decoder-wide {@link org.apache.joshua.decoder.ff.tm.OwnerMap}
    * @param config a {@link org.apache.joshua.decoder.JoshuaConfiguration} object
    * @param spanLimit the maximum span of the input grammar rule(s) can be applied to.
    */
   public MemoryBasedBatchGrammar(String owner, JoshuaConfiguration config, int spanLimit) {
+    this(null, owner, config, spanLimit);
+  }
+  
+  /**
+   * Constructor used by Decoder for creating custom grammars.
+   * 
+   * @param file the file to load the grammar from
+   * @param owner the associated decoder-wide {@link org.apache.joshua.decoder.ff.tm.OwnerMap}
+   * @param config a {@link org.apache.joshua.decoder.JoshuaConfiguration} object
+   * @param spanLimit the maximum span of the input grammar rule(s) can be applied to.
+   */
+  public MemoryBasedBatchGrammar(String file, String owner, JoshuaConfiguration config, int spanLimit) {
     super(owner, config, spanLimit);
   }
-
+  
   /**
    * Constructor to initialize a GrammarReader (unowned)
    * @param reader the GrammarReader used for storing ASCII line-based grammars on disk.
@@ -99,7 +120,11 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
     this.grammarFile = grammarFile;
 
     // ==== loading grammar
-    this.modelReader = createReader(formatKeyword, grammarFile);
+    try {
+      this.modelReader = createReader(formatKeyword, grammarFile);
+    } catch (IOException e) {
+      LOG.warn("Couldn't load a '{}' type grammar from file '{}'", formatKeyword, grammarFile);
+    }
     if (modelReader != null) {
       for (Rule rule : modelReader)
         if (rule != null) {
@@ -235,6 +260,48 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
   }
 
   /**
+   * Saves the grammar to the specified location.
+   */
+  @Override
+  public void save() {
+    
+    LOG.info("Saving custom grammar to file '{}'", grammarFile);
+    
+    try {
+      BufferedWriter out = new BufferedWriter(new OutputStreamWriter(
+          new FileOutputStream(grammarFile), "UTF-8"));
+
+      ArrayList<Trie> nodes = new ArrayList<Trie>();
+      nodes.add(root);
+      while (nodes.size() > 0) {
+        Trie trie = nodes.remove(0);
+        // find all rules at the current node, print them
+        if (trie.hasRules()) {
+          for (Rule rule: trie.getRuleCollection().getRules()) {
+            try {
+              LOG.info("  rule: {}", rule.textFormat());
+              out.write(rule.textFormat() + "\n");
+            } catch (IOException e) {
+              e.printStackTrace();
+              return;
+            }
+          }
+        }
+
+        // graph is acyclical so we shouldn't have to check for having visited
+        if (trie.hasExtensions())
+          nodes.addAll(trie.getExtensions());
+      }
+      
+      out.close();
+    } catch (IOException e) {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
+      return;
+    }
+  }
+
+  /**
    * Adds a default set of glue rules.
    * 
    * @param featureFunctions an {@link java.util.ArrayList} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2229d1b9/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
index 914bdd2..c1ee160 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -1021,6 +1021,11 @@ public class PackedGrammar extends AbstractGrammar {
   public void addRule(Rule rule) {
     throw new RuntimeException("PackedGrammar.addRule(): I can't add rules");
   }
+  
+  @Override
+  public void save() {
+    throw new RuntimeException("PackedGrammar.save(): I can't be saved");
+  }
 
   /**
    * Read the config file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2229d1b9/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
index de11f5f..74d2a8f 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
@@ -19,7 +19,9 @@
 package org.apache.joshua.decoder.phrase;
 
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
 import java.util.List;
 
 import org.apache.joshua.corpus.Vocabulary;
@@ -117,6 +119,14 @@ public class PhraseTable implements Grammar {
     backend.addRule(rule);
   }
   
+  /**
+   * Saves the grammar to disk. Only supported when the backend is a MemoryBasedBatchGrammar.
+   */
+  @Override
+  public void save() {
+    backend.save();
+  }
+  
   @Override
   public void addOOVRules(int sourceWord, List<FeatureFunction> featureFunctions) {
     // TODO: _OOV shouldn't be outright added, since the word might not be OOV for the LM (but now almost

[5/8] incubator-joshua git commit: cleanup in log messages, typos

Posted by mj...@apache.org.

cleanup in log messages, typos


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/81baa658
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/81baa658
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/81baa658

Branch: refs/heads/save_custom_grammars
Commit: 81baa658207f53623f2b07a5ae7d6a6e6ef0bbd8
Parents: 561799a
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Nov 16 06:41:03 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Nov 17 13:24:07 2016 -0500

----------------------------------------------------------------------
 .../joshua/decoder/chart_parser/Chart.java      |   4 +-
 .../decoder/ff/tm/SentenceFilteredGrammar.java  | 366 -------------------
 .../tm/hash_based/MemoryBasedBatchGrammar.java  |  10 +-
 .../joshua/decoder/segment_file/Token.java      |   2 +-
 src/test/resources/decoder/n-ary/joshua.config  |   2 -
 5 files changed, 6 insertions(+), 378 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/81baa658/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
index bd91a6f..883e20d 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
@@ -424,7 +424,7 @@ public class Chart {
     if (null == this.cells.get(0, sourceLength)
         || !this.goalBin.transitToGoal(this.cells.get(0, sourceLength), this.featureFunctions,
             this.sourceLength)) {
-      LOG.warn("Input {}: Parse failure (either no derivations exist or pruning is too aggressive",
+      LOG.warn("Input {}: Parse failure (either no derivations exist, or pruning is too aggressive)",
           sentence.id());
       return null;
     }
@@ -621,7 +621,7 @@ public class Chart {
     if (null == this.cells.get(0, sourceLength)
         || !this.goalBin.transitToGoal(this.cells.get(0, sourceLength), this.featureFunctions,
             this.sourceLength)) {
-      LOG.warn("Input {}: Parse failure (either no derivations exist or pruning is too aggressive",
+      LOG.warn("Input {}: Parse failure (either no derivations exist, or pruning is too aggressive)",
           sentence.id());
       return null;
     }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/81baa658/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
deleted file mode 100644
index 4f545b7..0000000
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *  http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.joshua.decoder.ff.tm;
-
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map.Entry;
-
-import org.apache.joshua.decoder.ff.tm.hash_based.ExtensionIterator;
-import org.apache.joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
-import org.apache.joshua.decoder.segment_file.Sentence;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * This class implements dynamic sentence-level filtering. This is accomplished with a parallel
- * trie, a subset of the original trie, that only contains trie paths that are reachable from
- * traversals of the current sentence.
- * 
- * @author Matt Post post@cs.jhu.edu
- */
-public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
-
-  private static final Logger LOG = LoggerFactory.getLogger(SentenceFilteredGrammar.class);
-
-  private final AbstractGrammar baseGrammar;
-  private final SentenceFilteredTrie filteredTrie;
-  private final int[] tokens;
-  private final Sentence sentence;
-
-  /**
-   * Construct a new sentence-filtered grammar. The main work is done in the enclosed trie (obtained
-   * from the base grammar, which contains the complete grammar).
-   * 
-   * @param baseGrammar a new {@link org.apache.joshua.decoder.ff.tm.AbstractGrammar} to populate
-   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
-   */
-  SentenceFilteredGrammar(AbstractGrammar baseGrammar, Sentence sentence) {
-    super(OwnerMap.getOwner(baseGrammar.getOwner()), baseGrammar.joshuaConfiguration, baseGrammar.getSpanLimit());
-    this.baseGrammar = baseGrammar;
-    this.sentence = sentence;
-    this.tokens = sentence.getWordIDs();
-
-    int origCount = getNumRules(baseGrammar.getTrieRoot());
-    long startTime = System.currentTimeMillis();
-
-    /* Filter the rules; returns non-null object */
-    this.filteredTrie = filter(baseGrammar.getTrieRoot());
-    int filteredCount = getNumRules();
-
-    float seconds = (System.currentTimeMillis() - startTime) / 1000.0f;
-
-    LOG.debug("Sentence-level filtering of sentence {} ({} -> {} rules) in {} seconds",
-        sentence.id(), origCount, filteredCount, seconds);
-  }
-
-  @Override
-  public Trie getTrieRoot() {
-    return filteredTrie;
-  }
-
-  /**
-   * This function is poorly named: it doesn't mean whether a rule exists in the grammar for the
-   * current span, but whether the grammar is permitted to apply rules to the current span (a
-   * grammar-level parameter). As such we can just chain to the underlying grammar.
-   */
-  @Override
-  public boolean hasRuleForSpan(int startIndex, int endIndex, int pathLength) {
-    return baseGrammar.hasRuleForSpan(startIndex, endIndex, pathLength);
-  }
-
-  @Override
-  public int getNumRules() {
-    return getNumRules(getTrieRoot());
-  }
-
-  /**
-   * A convenience function that counts the number of rules in a grammar's trie.
-   * 
-   * @param node the {@link org.apache.joshua.decoder.ff.tm.Trie} implementation for which to count rules
-   * @return the number of rules
-   */
-  public int getNumRules(Trie node) {
-    int numRules = 0;
-    if (node != null) {
-      if (node.getRuleCollection() != null)
-        numRules += node.getRuleCollection().getRules().size();
-
-      if (node.getExtensions() != null)
-        for (Trie child : node.getExtensions())
-          numRules += getNumRules(child);
-    }
-
-    return numRules;
-  }
-
-  /**
-   * What is the algorithm?
-   * 
-   * Take the first word of the sentence, and start at the root of the trie. There are two things to
-   * consider: (a) word matches and (b) nonterminal matches.
-   * 
-   * For a word match, simply follow that arc along the trie. We create a parallel arc in our
-   * filtered grammar to represent it. Each arc in the filtered trie knows about its
-   * corresponding/underlying node in the unfiltered grammar trie.
-   * 
-   * A nonterminal is always permitted to match. The question then is how much of the input sentence
-   * we imagine it consumed. The answer is that it could have been any amount. So the recursive call
-   * has to be a set of calls, one each to the next trie node with different lengths of the sentence
-   * remaining.
-   * 
-   * A problem occurs when we have multiple sequential nonterminals. For scope-3 grammars, there can
-   * be four sequential nonterminals (in the case when they are grounded by terminals on both ends
-   * of the nonterminal chain). We'd like to avoid looking at all possible ways to split up the
-   * subsequence, because with respect to filtering rules, they are all the same.
-   * 
-   * We accomplish this with the following restriction: for purposes of grammar filtering, only the
-   * first in a sequence of nonterminal traversals can consume more than one word. Each of the
-   * subsequent ones would have to consume just one word. We then just have to record in the
-   * recursive call whether the last traversal was a nonterminal or not.
-   * 
-   * @param unfilteredTrieRoot todo
-   * @return the root of the filtered trie
-   */
-  private SentenceFilteredTrie filter(Trie unfilteredTrieRoot) {
-    SentenceFilteredTrie filteredTrieRoot = new SentenceFilteredTrie(unfilteredTrieRoot);
-
-    // System.err.println(String.format("FILTERING TO SENTENCE\n  %s\n",
-    // Vocabulary.getWords(tokens)));
-
-    /*
-     * The root of the trie is where rule applications start, so we simply try all possible
-     * positions in the sentence.
-     */
-    for (int i = 0; i < tokens.length; i++) {
-      filter(i, filteredTrieRoot, false);
-    }
-
-    return filteredTrieRoot;
-  }
-
-  /**
-   * Matches rules against the sentence. Intelligently handles chains of sequential nonterminals.
-   * Marks arcs that are traversable for this sentence.
-   * 
-   * @param i the position in the sentence to start matching
-   * @param trie the trie node to match against
-   * @param lastWasNT true if the match that brought us here was against a nonterminal
-   */
-  private void filter(int i, SentenceFilteredTrie trieNode, boolean lastWasNT) {
-    if (i >= tokens.length)
-      return;
-
-    /* Make sure the underlying unfiltered node has children. */
-    Trie unfilteredTrieNode = trieNode.unfilteredTrieNode;
-    if (unfilteredTrieNode.getChildren() == null) {
-      // trieNode.path.retreat();
-      return;
-    }
-
-    /* Match a word */
-    Trie trie = unfilteredTrieNode.match(tokens[i]);
-    if (trie != null) {
-      /*
-       * The current filtered node might already have an arc for this label. If so, retrieve it
-       * (since we still need to follow it); if not, create it.
-       */
-      SentenceFilteredTrie nextFilteredTrie = trieNode.match(tokens[i]);
-      if (nextFilteredTrie == null) {
-        nextFilteredTrie = new SentenceFilteredTrie(trie);
-        trieNode.children.put(tokens[i], nextFilteredTrie);
-      }
-
-      /*
-       * Now continue, trying to match the child node against the next position in the sentence. The
-       * third argument records that this match was not against a nonterminal.
-       */
-      filter(i + 1, nextFilteredTrie, false);
-    }
-
-    /*
-     * Now we attempt to match nonterminals. Any nonterminal is permitted to match any region of the
-     * sentence, up to the maximum span for that grammar. So we enumerate all children of the
-     * current (unfiltered) trie grammar node, looking for nonterminals (items whose label value is
-     * less than 0), then recurse.
-     * 
-     * There is one subtlely. Adjacent nonterminals in a grammar rule can match a span (i, j) in (j
-     * - i - 1) ways, but for purposes of determining whether a rule fits, this is all wasted
-     * effort. To handle this, we allow the first nonterminal in a sequence to record 1, 2, 3, ...
-     * terminals (up to the grammar's span limit, or the rest of the sentence, whichever is
-     * shorter). Subsequent adjacent nonterminals are permitted to consume only a single terminal.
-     */
-    HashMap<Integer, ? extends Trie> children = unfilteredTrieNode.getChildren();
-    if (children != null) {
-      for (int label : children.keySet()) {
-        if (label < 0) {
-          SentenceFilteredTrie nextFilteredTrie = trieNode.match(label);
-          if (nextFilteredTrie == null) {
-            nextFilteredTrie = new SentenceFilteredTrie(unfilteredTrieNode.match(label));
-            trieNode.children.put(label, nextFilteredTrie);
-          }
-
-          /*
-           * Recurse. If the last match was a nonterminal, we can only consume one more token.
-           * 
-           * TODO: This goes too far by looking at the whole sentence; each grammar has a maximum
-           * span limit which should be consulted. What we should be doing is passing the point
-           * where we started matching the current sentence, so we can apply this span limit, which
-           * is easily accessible (baseGrammar.spanLimit).
-           */
-          int maxJ = lastWasNT ? (i + 1) : tokens.length;
-          for (int j = i + 1; j <= maxJ; j++) {
-            filter(j, nextFilteredTrie, true);
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Alternate filter that uses regular expressions, walking the grammar trie and matching the
-   * source side of each rule collection against the input sentence. Failed matches are discarded,
-   * and trie nodes extending from that position need not be explored.
-   * 
-   * @param unfilteredTrie todo
-   * @return the root of the filtered trie if any rules were retained, otherwise null
-   */
-  @SuppressWarnings("unused")
-  private SentenceFilteredTrie filter_regexp(Trie unfilteredTrie) {
-    SentenceFilteredTrie trie = null;
-
-    /* Case 1: keep the trie node if it has a rule collection that matches the sentence */
-    if (unfilteredTrie.hasRules())
-      if (matchesSentence(unfilteredTrie))
-        trie = new SentenceFilteredTrie(unfilteredTrie);
-      else
-        return null;
-
-    /* Case 2: keep the trie node if it has children who have valid rule collections */
-    if (unfilteredTrie.hasExtensions())
-      for (Entry<Integer, ? extends Trie> arc : unfilteredTrie.getChildren().entrySet()) {
-        Trie unfilteredChildTrie = arc.getValue();
-        SentenceFilteredTrie nextTrie = filter_regexp(unfilteredChildTrie);
-        if (nextTrie != null) {
-          if (trie == null)
-            trie = new SentenceFilteredTrie(unfilteredTrie);
-          trie.children.put(arc.getKey(), nextTrie);
-        }
-      }
-
-    return trie;
-  }
-
-  private boolean matchesSentence(Trie childTrie) {
-    Rule rule = childTrie.getRuleCollection().getRules().get(0);
-    return rule.matches(sentence);
-  }
-
-  /**
-   * Implements a filtered trie, by sitting on top of a base trie and annotating nodes that match
-   * the given input sentence.
-   * 
-   * @author Matt Post post@cs.jhu.edu
-   * 
-   */
-  public class SentenceFilteredTrie implements Trie {
-
-    /* The underlying unfiltered trie node. */
-    private final Trie unfilteredTrieNode;
-
-    /* The child nodes in the filtered trie. */
-    private HashMap<Integer, SentenceFilteredTrie> children = null;
-
-    /**
-     * Constructor.
-     * 
-     * @param unfilteredTrieNode todo
-     */
-    public SentenceFilteredTrie(Trie unfilteredTrieNode) {
-      this.unfilteredTrieNode = unfilteredTrieNode;
-      this.children = new HashMap<>();
-    }
-
-    @Override
-    public SentenceFilteredTrie match(int wordID) {
-      if (children != null)
-        return children.get(wordID);
-      return null;
-    }
-
-    @Override
-    public boolean hasExtensions() {
-      return children != null;
-    }
-
-    @Override
-    public Collection<SentenceFilteredTrie> getExtensions() {
-      if (children != null)
-        return children.values();
-
-      return null;
-    }
-
-    @Override
-    public HashMap<Integer, SentenceFilteredTrie> getChildren() {
-      return children;
-    }
-
-    @Override
-    public boolean hasRules() {
-      // Chain to the underlying unfiltered node.
-      return unfilteredTrieNode.hasRules();
-    }
-
-    @Override
-    public RuleCollection getRuleCollection() {
-      // Chain to the underlying unfiltered node, since the rule collection just varies by target
-      // side.
-      return unfilteredTrieNode.getRuleCollection();
-    }
-
-    /**
-     * Counts the number of rules.
-     * 
-     * @return the number of rules rooted at this node.
-     */
-    public int getNumRules() {
-      int numRules = 0;
-      if (getTrieRoot() != null)
-        if (getTrieRoot().getRuleCollection() != null)
-          numRules += getTrieRoot().getRuleCollection().getRules().size();
-
-      for (SentenceFilteredTrie node : getExtensions())
-        numRules += node.getNumRules();
-
-      return numRules;
-    }
-
-    @Override
-    public Iterator<Integer> getTerminalExtensionIterator() {
-      return new ExtensionIterator(children, true);
-    }
-
-    @Override
-    public Iterator<Integer> getNonterminalExtensionIterator() {
-      return new ExtensionIterator(children, false);
-    }
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/81baa658/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
index cd2d3af..ebfa996 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
@@ -109,8 +109,9 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
       LOG.info("Couldn't create a GrammarReader for file {} with format {}",
           grammarFile, formatKeyword);
     }
-
-    this.printGrammar();
+    
+    LOG.info("MemoryBasedBatchGrammar: Read {} rules with {} distinct source sides from '{}'",
+        this.qtyRulesRead, this.qtyRuleBins, grammarFile);
   }
 
   protected GrammarReader<Rule> createReader(String format, String grammarFile) throws IOException {
@@ -199,11 +200,6 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
     pos.ruleBin.addRule(rule);
   }
 
-  protected void printGrammar() {
-    LOG.info("MemoryBasedBatchGrammar: Read {} rules with {} distinct source sides from '{}'",
-        this.qtyRulesRead, this.qtyRuleBins, grammarFile);
-  }
-
   /***
    * Takes an input word and creates an OOV rule in the current grammar for that word.
    * 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/81baa658/src/main/java/org/apache/joshua/decoder/segment_file/Token.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/Token.java b/src/main/java/org/apache/joshua/decoder/segment_file/Token.java
index 4cbc7fa..11ba88f 100644
--- a/src/main/java/org/apache/joshua/decoder/segment_file/Token.java
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/Token.java
@@ -113,7 +113,7 @@ public class Token {
       else
         annotations.put("lettercase",  "lower");
       
-      LOG.info("TOKEN: {} -> {} ({})", token, token.toLowerCase(), annotations.get("lettercase"));
+      LOG.debug("TOKEN: {} -> {} ({})", token, token.toLowerCase(), annotations.get("lettercase"));
       token = token.toLowerCase(); 
     }
     

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/81baa658/src/test/resources/decoder/n-ary/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/n-ary/joshua.config b/src/test/resources/decoder/n-ary/joshua.config
index 1710c50..afc99bf 100644
--- a/src/test/resources/decoder/n-ary/joshua.config
+++ b/src/test/resources/decoder/n-ary/joshua.config
@@ -13,8 +13,6 @@ pop-limit = 100
 
 #nbest config
 use_unique_nbest = true
-use_tree_nbest = false
-add_combined_cost = true
 top_n = 1
 
 output-format = %c %s

[6/8] incubator-joshua git commit: bugfix in rule creation, cleaned up error messages

Posted by mj...@apache.org.

bugfix in rule creation, cleaned up error messages

Rules were being created in the old left-branching phrase-based style


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/3e851fa9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/3e851fa9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/3e851fa9

Branch: refs/heads/save_custom_grammars
Commit: 3e851fa9dde3f0fa161624ecbb48b8460b6c7743
Parents: ef07c53
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Nov 16 21:01:19 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Nov 17 13:24:07 2016 -0500

----------------------------------------------------------------------
 .../org/apache/joshua/server/ServerThread.java  | 33 ++++++++++++--------
 1 file changed, 20 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/3e851fa9/src/main/java/org/apache/joshua/server/ServerThread.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/server/ServerThread.java b/src/main/java/org/apache/joshua/server/ServerThread.java
index 638c194..6cb4bf1 100644
--- a/src/main/java/org/apache/joshua/server/ServerThread.java
+++ b/src/main/java/org/apache/joshua/server/ServerThread.java
@@ -190,6 +190,8 @@ public class ServerThread extends Thread implements HttpHandler {
     String[] tokens = meta.split("\\s+", 2);
     String type = tokens[0];
     String args = tokens.length > 1 ? tokens[1] : "";
+    
+    LOG.info("META: {}", type);
 
     switch (type) {
     case "get_weight":
@@ -213,18 +215,21 @@ public class ServerThread extends Thread implements HttpHandler {
 
       break;
     }
-    case "get_weights":
+    case "get_weights": {
       message.addMetaData("weights " + Decoder.weights.toString());
 
       break;
+    }
     case "add_rule": {
+      
+      try {
       String argTokens[] = args.split(" \\|\\|\\| ");
 
       if (argTokens.length < 3) {
-        LOG.error("* INVALID RULE '{}'", meta);
+        LOG.warn("* INVALID RULE '{}'", meta);
         return;
       }
-
+      
       String lhs = argTokens[0];
       String source = argTokens[1];
       String target = argTokens[2];
@@ -238,23 +243,20 @@ public class ServerThread extends Thread implements HttpHandler {
       /* Prepend source and target side nonterminals for phrase-based decoding. Probably better
        * handled in each grammar type's addRule() function.
        */
-      String ruleString = (joshuaConfiguration.search_algorithm.equals("stack")) ?
-          String
-              .format("%s ||| [X,1] %s ||| [X,1] %s ||| -1 %s %s", lhs, source, target, featureStr,
-                  alignmentStr) :
-          String.format("%s ||| %s ||| %s ||| -1 %s %s", lhs, source, target, featureStr,
-              alignmentStr);
-
+      String ruleString = String.format("%s ||| %s ||| %s ||| -1 %s %s", 
+          lhs, source, target, featureStr, alignmentStr);
       Rule rule = new HieroFormatReader().parseLine(ruleString);
       decoder.addCustomRule(rule);
 
       LOG.info("Added custom rule {}", rule.toString());
 
       break;
+      
+      } catch (Exception e) {
+        e.printStackTrace();
+      }
     }
-    case "list_rules":
-
-      LOG.info("list_rules");
+    case "list_rules": {
 
       // Walk the the grammar trie
       ArrayList<Trie> nodes = new ArrayList<>();
@@ -278,6 +280,7 @@ public class ServerThread extends Thread implements HttpHandler {
       }
 
       break;
+    }
     case "remove_rule": {
 
       Rule rule = new HieroFormatReader().parseLine(args);
@@ -304,6 +307,10 @@ public class ServerThread extends Thread implements HttpHandler {
       }
       break;
     }
+    default: {
+      LOG.warn("INVALID metadata command '{}'", type);
+      break;
+    }
     }
   }
 }