You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/05/18 13:57:25 UTC
[1/2] incubator-joshua git commit: comments
Repository: incubator-joshua
Updated Branches:
refs/heads/master f02bd279e -> ad84b9682
comments
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2ef15515
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2ef15515
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2ef15515
Branch: refs/heads/master
Commit: 2ef15515c23e6bed3530f32f731e639b476f8681
Parents: f02bd27
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed May 18 09:55:42 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed May 18 09:55:42 2016 -0400
----------------------------------------------------------------------
scripts/support/split2files | 4 ++--
scripts/training/trim_parallel_corpus.pl | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2ef15515/scripts/support/split2files
----------------------------------------------------------------------
diff --git a/scripts/support/split2files b/scripts/support/split2files
index 866ab0e..76bbdba 100755
--- a/scripts/support/split2files
+++ b/scripts/support/split2files
@@ -3,7 +3,7 @@
# Reads any number of file names from the command line, then split()s
# STDIN on tabs and writes them to those files. Example usage:
#
-# paste file1 file2 file3 ... | splittabs file1.new file2.new file3.new.gz ...
+# paste file1 file2 file3 ... | split2files file1.new file2.new file3.new.gz ...
#
# If there are more fields on STDIN that files on the command-line, the extra
# fields are silently discarded.
@@ -17,7 +17,7 @@ my @fh;
$| = 1; # don't buffer output
if (@ARGV < 0) {
- print "Usage: cat tabbed-file | splittabs file1 [file2 [file3 ...]]\n";
+ print "Usage: cat tabbed-file | split2files file1 [file2 [file3 ...]]\n";
exit;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2ef15515/scripts/training/trim_parallel_corpus.pl
----------------------------------------------------------------------
diff --git a/scripts/training/trim_parallel_corpus.pl b/scripts/training/trim_parallel_corpus.pl
index 39b635c..00cf5bf 100755
--- a/scripts/training/trim_parallel_corpus.pl
+++ b/scripts/training/trim_parallel_corpus.pl
@@ -5,7 +5,7 @@
# the first two fields has mroe than N tokens, the line is skipped.
# e.g.,
-# paste corpus.en corpus.fr | trim_parallel_corpus.pl 40 | splittabs en.trimmed.40 fr.trimmed.40
+# paste corpus.en corpus.fr | trim_parallel_corpus.pl 40 | split2files en.trimmed.40 fr.trimmed.40
my $thresh = shift || 100;
[2/2] incubator-joshua git commit: include alignments by default in
packing
Posted by mj...@apache.org.
include alignments by default in packing
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/ad84b968
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/ad84b968
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/ad84b968
Branch: refs/heads/master
Commit: ad84b96828327e6c3e89a3a2c930a30c2284a0a1
Parents: 2ef1551
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed May 18 09:57:13 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed May 18 09:57:13 2016 -0400
----------------------------------------------------------------------
scripts/support/grammar-packer.pl | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ad84b968/scripts/support/grammar-packer.pl
----------------------------------------------------------------------
diff --git a/scripts/support/grammar-packer.pl b/scripts/support/grammar-packer.pl
index d2b1627..170973d 100755
--- a/scripts/support/grammar-packer.pl
+++ b/scripts/support/grammar-packer.pl
@@ -20,20 +20,21 @@ use File::Temp qw/tempfile/;
use File::Basename qw/basename/;
my %opts = (
+ a => 1, # whether alignments are included in the grammar(s)
g => '', # comma-separated list of grammars to pack
o => '', # comma-separated list of grammar output directories
m => '8g', # amount of memory to give the packer
T => '/tmp', # location of temporary space
v => 0, # verbose
);
-getopts("m:T:vg:o:", \%opts) || die usage();
+getopts("am:T:vg:o:", \%opts) || die usage();
die usage() if (@ARGV);
my $JOSHUA = $ENV{JOSHUA} or die "you must defined \$JOSHUA";
my $CAT = "$JOSHUA/scripts/training/scat";
sub usage {
- print "Usage: grammar-packer.pl [-m MEM] [-T /path/to/tmp] -g 'grammar [grammar2 ...]' -o 'grammar.packed [grammar2.packed ...]'\n";
+ print "Usage: grammar-packer.pl [-a] [-m MEM] [-T /path/to/tmp] -g 'grammar [grammar2 ...]' -o 'grammar.packed [grammar2.packed ...]'\n";
exit 1;
}
@@ -88,7 +89,8 @@ foreach my $grammar (@grammars) {
# Do the packing using the config.
my $grammars = join(" ", @sorted_grammars);
my $outputs = join(" ", @outputs);
-my $cmd = "java -Xmx$opts{m} -cp $JOSHUA/lib/args4j-2.0.29.jar:$JOSHUA/class joshua.tools.GrammarPackerCli -g $grammars --outputs $outputs";
+my $alignments = $opts{a} ? "--ga" : "";
+my $cmd = "java -Xmx$opts{m} -cp $JOSHUA/lib/args4j-2.0.29.jar:$JOSHUA/class joshua.tools.GrammarPackerCli -g $grammars --outputs $outputs $alignments";
print STDERR "Packing with $cmd...\n" if $opts{v};
my $retval = system($cmd);