You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/28 05:46:13 UTC
[4/4] incubator-joshua git commit: now defaults to NOT filtering
grammar; reference optional for test
now defaults to NOT filtering grammar; reference optional for test
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/3e230474
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/3e230474
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/3e230474
Branch: refs/heads/master
Commit: 3e2304747aeb776b093311b4165a6e6dc475d740
Parents: bcc39db
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Apr 27 23:45:52 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Apr 27 23:45:52 2016 -0400
----------------------------------------------------------------------
scripts/training/pipeline.pl | 63 ++++++++++++++++++++++-----------------
1 file changed, 35 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/3e230474/scripts/training/pipeline.pl
----------------------------------------------------------------------
diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl
index a438e60..373145f 100755
--- a/scripts/training/pipeline.pl
+++ b/scripts/training/pipeline.pl
@@ -66,7 +66,7 @@ my $MAXLEN_TEST = 0;
# when doing phrase-based decoding, the maximum length of a phrase (source side)
my $MAX_PHRASE_LEN = 5;
-my $DO_FILTER_TM = 1;
+my $DO_FILTER_TM = 0;
my $DO_SUBSAMPLE = 0;
my $DO_PACK_GRAMMARS = 1;
my $SCRIPTDIR = "$JOSHUA/scripts";
@@ -1739,14 +1739,17 @@ $cachepipe->cmd("test-bleu-${OPTIMIZER_RUN}",
"$testdir/bleu");
# Update the BLEU summary.
-compute_bleu_summary("test/*/bleu", "test/final-bleu");
+# Sometimes the target side for test doesn't exist (e.g., WMT)
+if (-e $TEST{target}) {
+ compute_bleu_summary("test/*/bleu", "test/final-bleu");
-if (defined $METEOR) {
- $cachepipe->cmd("test-meteor-${OPTIMIZER_RUN}",
- "$JOSHUA/bin/meteor $output $TEST{target} $TARGET > $testdir/meteor",
- $bestoutput,
- "$testdir/meteor");
- compute_meteor_summary("test/*/meteor", "test/final-meteor");
+ if (defined $METEOR) {
+ $cachepipe->cmd("test-meteor-${OPTIMIZER_RUN}",
+ "$JOSHUA/bin/meteor $output $TEST{target} $TARGET > $testdir/meteor",
+ $bestoutput,
+ "$testdir/meteor");
+ compute_meteor_summary("test/*/meteor", "test/final-meteor");
+ }
}
if ($DO_MBR) {
@@ -1759,12 +1762,14 @@ if ($DO_MBR) {
$nbestoutput,
$mbr_output);
- $cachepipe->cmd("test-bleu-mbr-${OPTIMIZER_RUN}",
- "$JOSHUA/bin/bleu output $TEST{target} $numrefs > $testdir/bleu.mbr",
- $mbr_output,
- "$testdir/bleu.mbr");
+ if (-e $TEST{target}) {
+ $cachepipe->cmd("test-bleu-mbr-${OPTIMIZER_RUN}",
+ "$JOSHUA/bin/bleu output $TEST{target} $numrefs > $testdir/bleu.mbr",
+ $mbr_output,
+ "$testdir/bleu.mbr");
- compute_bleu_summary("test/*/bleu.mbr", "test/final-bleu-mbr");
+ compute_bleu_summary("test/*/bleu.mbr", "test/final-bleu-mbr");
+ }
}
compute_time_summary("test/*/joshua.log", "test/final-times");
@@ -2093,25 +2098,27 @@ sub analyze_testrun {
my ($output,$source,$reference) = @_;
my $dir = dirname($output);
- mkdir("$dir/analysis") unless -d "$dir/analysis";
+ if (-e $reference) {
+ mkdir("$dir/analysis") unless -d "$dir/analysis";
- my @references;
- if (-e "$reference.0") {
- my $num = 0;
- while (-e "$reference.$num") {
- push(@references, "$reference.$num");
- $num++;
+ my @references;
+ if (-e "$reference.0") {
+ my $num = 0;
+ while (-e "$reference.$num") {
+ push(@references, "$reference.$num");
+ $num++;
+ }
+ } else {
+ push(@references, $reference);
}
- } else {
- push(@references, $reference);
- }
- my $references = join(" -r ", @references);
+ my $references = join(" -r ", @references);
- $cachepipe->cmd("analyze-test-${OPTIMIZER_RUN}",
- "$SCRIPTDIR/analysis/sentence-by-sentence.pl -s $source -r $references $output > $dir/analysis/sentence-by-sentence.html",
- $output,
- "$dir/analysis/sentence-by-sentence.html");
+ $cachepipe->cmd("analyze-test-${OPTIMIZER_RUN}",
+ "$SCRIPTDIR/analysis/sentence-by-sentence.pl -s $source -r $references $output > $dir/analysis/sentence-by-sentence.html",
+ $output,
+ "$dir/analysis/sentence-by-sentence.html");
+ }
}
sub compute_meteor_summary {