You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/05/18 13:57:25 UTC

[1/2] incubator-joshua git commit: comments

Repository: incubator-joshua
Updated Branches:
  refs/heads/master f02bd279e -> ad84b9682


comments


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2ef15515
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2ef15515
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2ef15515

Branch: refs/heads/master
Commit: 2ef15515c23e6bed3530f32f731e639b476f8681
Parents: f02bd27
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed May 18 09:55:42 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed May 18 09:55:42 2016 -0400

----------------------------------------------------------------------
 scripts/support/split2files              | 4 ++--
 scripts/training/trim_parallel_corpus.pl | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2ef15515/scripts/support/split2files
----------------------------------------------------------------------
diff --git a/scripts/support/split2files b/scripts/support/split2files
index 866ab0e..76bbdba 100755
--- a/scripts/support/split2files
+++ b/scripts/support/split2files
@@ -3,7 +3,7 @@
 # Reads any number of file names from the command line, then split()s
 # STDIN on tabs and writes them to those files. Example usage:
 #
-# paste file1 file2 file3 ... | splittabs file1.new file2.new file3.new.gz ...
+# paste file1 file2 file3 ... | split2files file1.new file2.new file3.new.gz ...
 #
 # If there are more fields on STDIN that files on the command-line, the extra
 # fields are silently discarded.
@@ -17,7 +17,7 @@ my @fh;
 $| = 1;   # don't buffer output
 
 if (@ARGV < 0) {
-  print "Usage: cat tabbed-file | splittabs file1 [file2 [file3 ...]]\n";
+  print "Usage: cat tabbed-file | split2files file1 [file2 [file3 ...]]\n";
   exit;
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2ef15515/scripts/training/trim_parallel_corpus.pl
----------------------------------------------------------------------
diff --git a/scripts/training/trim_parallel_corpus.pl b/scripts/training/trim_parallel_corpus.pl
index 39b635c..00cf5bf 100755
--- a/scripts/training/trim_parallel_corpus.pl
+++ b/scripts/training/trim_parallel_corpus.pl
@@ -5,7 +5,7 @@
 # the first two fields has mroe than N tokens, the line is skipped.
 
 # e.g.,
-# paste corpus.en corpus.fr | trim_parallel_corpus.pl 40 | splittabs en.trimmed.40 fr.trimmed.40
+# paste corpus.en corpus.fr | trim_parallel_corpus.pl 40 | split2files en.trimmed.40 fr.trimmed.40
 
 my $thresh = shift || 100;
 


[2/2] incubator-joshua git commit: include alignments by default in packing

Posted by mj...@apache.org.
include alignments by default in packing


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/ad84b968
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/ad84b968
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/ad84b968

Branch: refs/heads/master
Commit: ad84b96828327e6c3e89a3a2c930a30c2284a0a1
Parents: 2ef1551
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed May 18 09:57:13 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed May 18 09:57:13 2016 -0400

----------------------------------------------------------------------
 scripts/support/grammar-packer.pl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ad84b968/scripts/support/grammar-packer.pl
----------------------------------------------------------------------
diff --git a/scripts/support/grammar-packer.pl b/scripts/support/grammar-packer.pl
index d2b1627..170973d 100755
--- a/scripts/support/grammar-packer.pl
+++ b/scripts/support/grammar-packer.pl
@@ -20,20 +20,21 @@ use File::Temp qw/tempfile/;
 use File::Basename qw/basename/;
 
 my %opts = (
+  a => 1,         # whether alignments are included in the grammar(s)
   g => '',        # comma-separated list of grammars to pack
   o => '',        # comma-separated list of grammar output directories
   m => '8g',      # amount of memory to give the packer
   T => '/tmp',    # location of temporary space
   v => 0,         # verbose
 );
-getopts("m:T:vg:o:", \%opts) || die usage();
+getopts("am:T:vg:o:", \%opts) || die usage();
 die usage() if (@ARGV);
 
 my $JOSHUA = $ENV{JOSHUA} or die "you must defined \$JOSHUA";
 my $CAT    = "$JOSHUA/scripts/training/scat";
 
 sub usage {
-  print "Usage: grammar-packer.pl [-m MEM] [-T /path/to/tmp] -g 'grammar [grammar2 ...]' -o 'grammar.packed [grammar2.packed ...]'\n";
+  print "Usage: grammar-packer.pl [-a] [-m MEM] [-T /path/to/tmp] -g 'grammar [grammar2 ...]' -o 'grammar.packed [grammar2.packed ...]'\n";
   exit 1;
 }
 
@@ -88,7 +89,8 @@ foreach my $grammar (@grammars) {
 # Do the packing using the config.
 my $grammars = join(" ", @sorted_grammars);
 my $outputs  = join(" ", @outputs);
-my $cmd = "java -Xmx$opts{m} -cp $JOSHUA/lib/args4j-2.0.29.jar:$JOSHUA/class joshua.tools.GrammarPackerCli -g $grammars --outputs $outputs";
+my $alignments = $opts{a} ? "--ga" : "";
+my $cmd = "java -Xmx$opts{m} -cp $JOSHUA/lib/args4j-2.0.29.jar:$JOSHUA/class joshua.tools.GrammarPackerCli -g $grammars --outputs $outputs $alignments";
 print STDERR "Packing with $cmd...\n" if $opts{v};
 
 my $retval = system($cmd);