You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/10/11 21:00:10 UTC

[5/6] incubator-joshua git commit: pass in already-sorted grammar to the packer

pass in already-sorted grammar to the packer


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/f0ed94c4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/f0ed94c4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/f0ed94c4

Branch: refs/heads/master
Commit: f0ed94c414eb2f18a708d0ff64e2e787c5978b73
Parents: f098431
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Oct 6 14:17:11 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Oct 6 14:17:11 2016 -0400

----------------------------------------------------------------------
 scripts/support/grammar-packer.pl | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f0ed94c4/scripts/support/grammar-packer.pl
----------------------------------------------------------------------
diff --git a/scripts/support/grammar-packer.pl b/scripts/support/grammar-packer.pl
index 47ce66f..fab46cd 100755
--- a/scripts/support/grammar-packer.pl
+++ b/scripts/support/grammar-packer.pl
@@ -30,8 +30,9 @@ my %opts = (
   m => '8g',      # amount of memory to give the packer
   T => '/tmp',    # location of temporary space
   v => 0,         # verbose
+  s => 0,         # grammars have already been UNIX sorted, skip sorting
 );
-getopts("am:T:vg:o:", \%opts) || die usage();
+getopts("am:T:vg:o:s", \%opts) || die usage();
 die usage() if (@ARGV);
 
 my $JOSHUA = $ENV{JOSHUA} or die "you must defined \$JOSHUA";
@@ -64,18 +65,25 @@ foreach my $grammar (@grammars) {
     exit 1;
   }
 
-  # Sort the grammar or phrase table
-  my $name = basename($grammar);
-  my (undef,$sorted_grammar) = tempfile("${name}XXXX", DIR => $opts{T}, UNLINK => 1);
-  print STDERR "Sorting grammar to $sorted_grammar...\n" if $opts{v};
+  if ($opts{s}) {
+    # The user *swears* the grammars are already sorted! Trust her.
+    print STDERR "You claim the grammars are already sorted. Okay, I'll believe you!\n";
+    push(@sorted_grammars, $grammar);
 
-  # regular grammar
-  if (system("$CAT $grammar | sed 's/ ||| /\t/g' | LC_ALL=C sort -t'\t' -k2,2 -k3,3 --buffer-size=$opts{m} -T $opts{T} | sed 's/\t/ ||| /g' | gzip -9n > $sorted_grammar")) {
-    print STDERR "* FATAL: Couldn't sort the grammar (not enough memory? short on tmp space?)\n";
-    exit 2;
-  }
+  } else {
+    # Sort the grammar or phrase table
+    my $name = basename($grammar);
+    my (undef,$sorted_grammar) = tempfile("${name}XXXX", DIR => $opts{T}, UNLINK => 1);
+    print STDERR "Sorting grammar to $sorted_grammar...\n" if $opts{v};
+
+    # regular grammar
+    if (system("$CAT $grammar | sed 's/ ||| /\t/g' | LC_ALL=C sort -t'\t' -k2,2 -k3,3 --buffer-size=$opts{m} -T $opts{T} | sed 's/\t/ ||| /g' | gzip -9n > $sorted_grammar")) {
+      print STDERR "* FATAL: Couldn't sort the grammar (not enough memory? short on tmp space?)\n";
+      exit 2;
+    }
 
-  push(@sorted_grammars, $sorted_grammar);
+    push(@sorted_grammars, $sorted_grammar);
+  }
 }
 
 
@@ -92,5 +100,7 @@ if ($retval == 0) {
   map { unlink($_) } @sorted_grammars;
 } else {
   print STDERR "* FATAL: Couldn't pack the grammar.\n";
+  print STDERR "* Copying sorted grammars ($grammars) to current directory.\n";
+  system("cp $grammars .");
   exit 1;
 }