You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/11/12 20:57:45 UTC
incubator-joshua git commit: updated thrax templates to extract count
Repository: incubator-joshua
Updated Branches:
refs/heads/master ced37eaf6 -> 885389d51
updated thrax templates to extract count
An earlier commit filtered to the top 100 items for each source side
rule, so with this count, grammars are now a lot smaller, and the
amortization time and time-to-first translation will be much lower.
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/885389d5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/885389d5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/885389d5
Branch: refs/heads/master
Commit: 885389d513b5d0f3f68b59c3b17a776584b3a208
Parents: ced37ea
Author: Matt Post <po...@cs.jhu.edu>
Authored: Sat Nov 12 15:56:51 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Sat Nov 12 15:56:51 2016 -0500
----------------------------------------------------------------------
scripts/training/templates/thrax-hiero.conf | 2 +-
scripts/training/templates/thrax-phrasal.conf | 2 +-
scripts/training/templates/thrax-phrase-gt.conf | 2 +-
scripts/training/templates/thrax-phrase.conf | 2 +-
scripts/training/templates/thrax-samt.conf | 2 +-
5 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/885389d5/scripts/training/templates/thrax-hiero.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/thrax-hiero.conf b/scripts/training/templates/thrax-hiero.conf
index b38098a..b3036cb 100644
--- a/scripts/training/templates/thrax-hiero.conf
+++ b/scripts/training/templates/thrax-hiero.conf
@@ -73,7 +73,7 @@ phrase-penalty 2.718
# in this example, the features are phrase translation probability,
# lexical probability, and phrase penalty
# features phrase-penalty e2fphrase f2ephrase lexprob lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic glue-rule rarity target-word-count unaligned-count
-features e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty alignment
+features e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty alignment count
# the only option and default later we will want to add formats for other decoders such as moses and
# cdec, if they use other formats
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/885389d5/scripts/training/templates/thrax-phrasal.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/thrax-phrasal.conf b/scripts/training/templates/thrax-phrasal.conf
index 380b230..953d9c3 100644
--- a/scripts/training/templates/thrax-phrasal.conf
+++ b/scripts/training/templates/thrax-phrasal.conf
@@ -74,7 +74,7 @@ phrase-penalty 2.718
# lexical probability, and phrase penalty
# features phrase-penalty e2fphrase f2ephrase lexprob lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic glue-rule rarity target-word-count unaligned-count
-features e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty
+features e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty count
# the only option and default
# later we will want to a dd formats for other decoders
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/885389d5/scripts/training/templates/thrax-phrase-gt.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/thrax-phrase-gt.conf b/scripts/training/templates/thrax-phrase-gt.conf
index 0bfd815..5d8f33f 100644
--- a/scripts/training/templates/thrax-phrase-gt.conf
+++ b/scripts/training/templates/thrax-phrase-gt.conf
@@ -69,7 +69,7 @@ phrase-penalty 2.718
# in this example, the features are phrase translation probability,
# lexical probability, and phrase penalty
# features phrase-penalty e2fphrase f2ephrase lexprob lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic glue-rule rarity target-word-count unaligned-count
-features e_given_f_phrase_gt_smoothed f_given_e_phrase_gt_smoothed e_given_f_lex f_given_e_lex rarity phrase-penalty alignment
+features e_given_f_phrase_gt_smoothed f_given_e_phrase_gt_smoothed e_given_f_lex f_given_e_lex rarity phrase-penalty alignment count
# the only option and default later we will want to add formats for other decoders such as moses and
# cdec, if they use other formats
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/885389d5/scripts/training/templates/thrax-phrase.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/thrax-phrase.conf b/scripts/training/templates/thrax-phrase.conf
index fbf113a..997bee8 100644
--- a/scripts/training/templates/thrax-phrase.conf
+++ b/scripts/training/templates/thrax-phrase.conf
@@ -69,7 +69,7 @@ phrase-penalty 2.718
# in this example, the features are phrase translation probability,
# lexical probability, and phrase penalty
# features phrase-penalty e2fphrase f2ephrase lexprob lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic glue-rule rarity target-word-count unaligned-count
-features e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty alignment
+features e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty alignment count
# the only option and default later we will want to add formats for other decoders such as moses and
# cdec, if they use other formats
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/885389d5/scripts/training/templates/thrax-samt.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/thrax-samt.conf b/scripts/training/templates/thrax-samt.conf
index 48aa2b1..e438a76 100644
--- a/scripts/training/templates/thrax-samt.conf
+++ b/scripts/training/templates/thrax-samt.conf
@@ -75,7 +75,7 @@ phrase-penalty 2.718
# lexical probability, and phrase penalty
# features phrase-penalty e2fphrase f2ephrase lexprob lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic glue-rule rarity target-word-count unaligned-count
#features e_given_f_phrase f_given_e_phrase e_given_f_lex f_given_e_lex rarity phrase-penalty
-features e2fphrase f2ephrase lexprob phrase-penalty rarity lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic target-word-count unaligned-count
+features e2fphrase f2ephrase lexprob phrase-penalty rarity lexical abstract adjacent x-rule source-terminals-without-target target-terminals-without-source monotonic target-word-count unaligned-count count
# the only option and default later we will want to add formats for other decoders such as moses and
# cdec, if they use other formats