You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/11/12 20:57:45 UTC

incubator-joshua git commit: updated thrax templates to extract count

Repository: incubator-joshua
Updated Branches:
  refs/heads/master ced37eaf6 -> 885389d51


updated thrax templates to extract count

An earlier commit filtered to the top 100 items for each source side
rule, so with this count, grammars are now a lot smaller, and the
amortization time and time-to-first translation will be much lower.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/885389d5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/885389d5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/885389d5

Branch: refs/heads/master
Commit: 885389d513b5d0f3f68b59c3b17a776584b3a208
Parents: ced37ea
Author: Matt Post <po...@cs.jhu.edu>
Authored: Sat Nov 12 15:56:51 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Sat Nov 12 15:56:51 2016 -0500

----------------------------------------------------------------------
 scripts/training/templates/thrax-hiero.conf     | 2 +-
 scripts/training/templates/thrax-phrasal.conf   | 2 +-
 scripts/training/templates/thrax-phrase-gt.conf | 2 +-
 scripts/training/templates/thrax-phrase.conf    | 2 +-
 scripts/training/templates/thrax-samt.conf      | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/885389d5/scripts/training/templates/thrax-hiero.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/thrax-hiero.conf b/scripts/training/templates/thrax-hiero.conf
index b38098a..b3036cb 100644
--- a/scripts/training/templates/thrax-hiero.conf
+++ b/scripts/training/templates/thrax-hiero.conf
@@ -73,7 +73,7 @@ phrase-penalty  2.718
 # in this example, the features are phrase translation probability,
 # lexical probability, and phrase penalty
 # features        phrase-penalty e2fphrase f2ephrase lexprob lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic glue-rule rarity target-word-count unaligned-count
-features        e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty alignment
+features        e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty alignment count
 
 # the only option and default later we will want to add formats for other decoders such as moses and
 # cdec, if they use other formats

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/885389d5/scripts/training/templates/thrax-phrasal.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/thrax-phrasal.conf b/scripts/training/templates/thrax-phrasal.conf
index 380b230..953d9c3 100644
--- a/scripts/training/templates/thrax-phrasal.conf
+++ b/scripts/training/templates/thrax-phrasal.conf
@@ -74,7 +74,7 @@ phrase-penalty  2.718
 # lexical probability, and phrase penalty
 # features        phrase-penalty e2fphrase f2ephrase lexprob lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic glue-rule rarity target-word-count unaligned-count
 
-features        e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty
+features        e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty count
 
 # the only option and default
 # later we will want to a dd formats for other decoders

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/885389d5/scripts/training/templates/thrax-phrase-gt.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/thrax-phrase-gt.conf b/scripts/training/templates/thrax-phrase-gt.conf
index 0bfd815..5d8f33f 100644
--- a/scripts/training/templates/thrax-phrase-gt.conf
+++ b/scripts/training/templates/thrax-phrase-gt.conf
@@ -69,7 +69,7 @@ phrase-penalty  2.718
 # in this example, the features are phrase translation probability,
 # lexical probability, and phrase penalty
 # features        phrase-penalty e2fphrase f2ephrase lexprob lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic glue-rule rarity target-word-count unaligned-count
-features        e_given_f_phrase_gt_smoothed f_given_e_phrase_gt_smoothed e_given_f_lex f_given_e_lex rarity phrase-penalty alignment
+features        e_given_f_phrase_gt_smoothed f_given_e_phrase_gt_smoothed e_given_f_lex f_given_e_lex rarity phrase-penalty alignment count
 
 # the only option and default later we will want to add formats for other decoders such as moses and
 # cdec, if they use other formats

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/885389d5/scripts/training/templates/thrax-phrase.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/thrax-phrase.conf b/scripts/training/templates/thrax-phrase.conf
index fbf113a..997bee8 100644
--- a/scripts/training/templates/thrax-phrase.conf
+++ b/scripts/training/templates/thrax-phrase.conf
@@ -69,7 +69,7 @@ phrase-penalty  2.718
 # in this example, the features are phrase translation probability,
 # lexical probability, and phrase penalty
 # features        phrase-penalty e2fphrase f2ephrase lexprob lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic glue-rule rarity target-word-count unaligned-count
-features        e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty alignment
+features        e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty alignment count
 
 # the only option and default later we will want to add formats for other decoders such as moses and
 # cdec, if they use other formats

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/885389d5/scripts/training/templates/thrax-samt.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/thrax-samt.conf b/scripts/training/templates/thrax-samt.conf
index 48aa2b1..e438a76 100644
--- a/scripts/training/templates/thrax-samt.conf
+++ b/scripts/training/templates/thrax-samt.conf
@@ -75,7 +75,7 @@ phrase-penalty  2.718
 # lexical probability, and phrase penalty
 # features        phrase-penalty e2fphrase f2ephrase lexprob lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic glue-rule rarity target-word-count unaligned-count
 #features        e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty
-features        e2fphrase f2ephrase lexprob phrase-penalty rarity lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic target-word-count unaligned-count
+features        e2fphrase f2ephrase lexprob phrase-penalty rarity lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic target-word-count unaligned-count count
 
 # the only option and default later we will want to add formats for other decoders such as moses and
 # cdec, if they use other formats