You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/11/03 18:51:20 UTC

[1/2] incubator-joshua git commit: added example sentences and translations to the LP

Repository: incubator-joshua
Updated Branches:
  refs/heads/master 8b530bb07 -> f23cd0917


added example sentences and translations to the LP


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2967b53d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2967b53d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2967b53d

Branch: refs/heads/master
Commit: 2967b53dbccf703f797ea77a4250643b44d16e0b
Parents: 8b530bb
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Nov 1 16:29:26 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Nov 1 16:29:26 2016 -0400

----------------------------------------------------------------------
 scripts/language-pack/README.template |  9 ++++++---
 scripts/language-pack/build_lp.sh     | 18 +++++++++++++++---
 2 files changed, 21 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2967b53d/scripts/language-pack/README.template
----------------------------------------------------------------------
diff --git a/scripts/language-pack/README.template b/scripts/language-pack/README.template
index 03041ca..36a0740 100644
--- a/scripts/language-pack/README.template
+++ b/scripts/language-pack/README.template
@@ -16,6 +16,9 @@ For information on the data used to construct this language pack,
 please see the CREDITS file, and to see its performance on a range of
 different publicly available test sets, see BENCHMARKS.
 
+A small number of example sentences are provided in 'example.<SRC>', along
+with a human reference translation for each in 'example.<TRG>'.
+
 This language pack was released on <DATE>.
 
 Quick Start
@@ -27,9 +30,9 @@ To run the language pack, invoke the command
 The Joshua decoder will start running, accepting input from STDIN and writing to
 STDOUT. Joshua expects its input in the form of a single sentence per line. Each
 sentence should first be piped through `prepare.sh`, which normalizes and
-tokenizes the input for the language pack's source language.
+tokenizes the input for the language pack's source language. 
 
-    cat sentences.txt | prepare.sh | joshua > output.txt
+    cat example.<SRC> | prepare.sh | joshua > output.<TRG>
 
 It takes some time (sometimes as much as a minute) to load all of the models
 into memory, which means there is high latency from startup until the first
@@ -42,7 +45,7 @@ option
 
 You can then connect directly to the socket using nc or telnet:
 
-    cat sentences.txt | prepare.sh | nc localhost 5674 > output.txt
+    cat example.<SRC> | prepare.sh | nc localhost 5674 > output.<TRG>
 
 You can set the RESTful interface by also passing '-server-type http':
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2967b53d/scripts/language-pack/build_lp.sh
----------------------------------------------------------------------
diff --git a/scripts/language-pack/build_lp.sh b/scripts/language-pack/build_lp.sh
index 68cd086..1426768 100755
--- a/scripts/language-pack/build_lp.sh
+++ b/scripts/language-pack/build_lp.sh
@@ -8,17 +8,19 @@ config=$2
 mem=$3
 credits=$4
 benchmark=$5
+example=$6
 
 date=$(date +%Y-%m-%d)
 
-if [[ -z $5 ]]; then
-    echo "Usage: $0 langpair config mem credits-file benchmark-file"
+if [[ -z $6 ]]; then
+    echo "Usage: $0 langpair config mem credits-file benchmark-file example"
     echo "where"
     echo "  langpair is the language pair, (e.g., es-en)"
     echo "  config is the tuned Joshua config, (1/tune/joshua.config.final)"
     echo "  mem is the amount of memory the decoder needs"
     echo "  credits-file is a file describing how the model was built (1/CREDITS"
     echo "  benchmark-file is a file describing model performance on test sets (1/BENCHMARK)"
+    echo "  example is a path prefix to a pair of small (~10 lines) example files"
     exit 1
 fi
 
@@ -58,10 +60,19 @@ copy_template() {
     > $2
 }
 
-# Copy over critical infrastructure files
+# Create the target directory
 [[ ! -d "$dest/target" ]] && mkdir -p "$dest/target"
+
+# Copy over critical infrastructure files
 cp $JOSHUA/target/joshua-*-jar-with-dependencies.jar $dest/target
 
+# Copy over the example files
+for ext in $source_abbr $target_abbr; do
+    [[ ! -s $example.$ext ]] && echo "Can't find example file $example.$ext, quitting" && exit
+    cp $example.$ext $dest/example.$ext
+    chmod 444 $dest/example.$ext
+done
+
 # Copy over the web demonstration
 cp -a $JOSHUA/demo $dest/web
 
@@ -81,3 +92,4 @@ chmod 444 $dest/BENCHMARK
 # Create the README
 copy_template "$JOSHUA/scripts/language-pack/README.template" "$dest/README"
 chmod 444 $dest/README
+


[2/2] incubator-joshua git commit: shutup!

Posted by mj...@apache.org.
shutup!


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/f23cd091
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/f23cd091
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/f23cd091

Branch: refs/heads/master
Commit: f23cd0917699cadd52ffaac7e12b45071e21bf8e
Parents: 2967b53
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Nov 3 14:50:40 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Nov 3 14:50:40 2016 -0400

----------------------------------------------------------------------
 scripts/preparation/tokenize.pl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f23cd091/scripts/preparation/tokenize.pl
----------------------------------------------------------------------
diff --git a/scripts/preparation/tokenize.pl b/scripts/preparation/tokenize.pl
index 0ae49cd..a5cae8d 100755
--- a/scripts/preparation/tokenize.pl
+++ b/scripts/preparation/tokenize.pl
@@ -49,7 +49,7 @@ if (!$QUIET) {
 
 load_prefixes($language,\%NONBREAKING_PREFIX);
 
-if (scalar(%NONBREAKING_PREFIX) eq 0){
+if (scalar(%NONBREAKING_PREFIX) == 0 && ! $QUIET){
 	print STDERR "Warning: No known abbreviations for language '$language'\n";
 }
 
@@ -262,7 +262,7 @@ sub load_prefixes {
 	#default back to English if we don't have a language-specific prefix file
 	if (!(-e $prefixfile)) {
 		$prefixfile = "$PREFIX_DIR/nonbreaking_prefix.en";
-		print STDERR "WARNING: No known abbreviations for language '$language', attempting fall-back to English version...\n";
+		print STDERR "WARNING: No known abbreviations for language '$language', attempting fall-back to English version...\n" unless $QUIET;
 		die ("ERROR: No abbreviations files found in $PREFIX_DIR\n") unless (-e $prefixfile);
 	}