You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/11/03 18:51:20 UTC
[1/2] incubator-joshua git commit: added example sentences and
translations to the LP
Repository: incubator-joshua
Updated Branches:
refs/heads/master 8b530bb07 -> f23cd0917
added example sentences and translations to the LP
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2967b53d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2967b53d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2967b53d
Branch: refs/heads/master
Commit: 2967b53dbccf703f797ea77a4250643b44d16e0b
Parents: 8b530bb
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Nov 1 16:29:26 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Nov 1 16:29:26 2016 -0400
----------------------------------------------------------------------
scripts/language-pack/README.template | 9 ++++++---
scripts/language-pack/build_lp.sh | 18 +++++++++++++++---
2 files changed, 21 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2967b53d/scripts/language-pack/README.template
----------------------------------------------------------------------
diff --git a/scripts/language-pack/README.template b/scripts/language-pack/README.template
index 03041ca..36a0740 100644
--- a/scripts/language-pack/README.template
+++ b/scripts/language-pack/README.template
@@ -16,6 +16,9 @@ For information on the data used to construct this language pack,
please see the CREDITS file, and to see its performance on a range of
different publicly available test sets, see BENCHMARKS.
+A small number of example sentences are provided in 'example.<SRC>', along
+with a human reference translation for each in 'example.<TRG>'.
+
This language pack was released on <DATE>.
Quick Start
@@ -27,9 +30,9 @@ To run the language pack, invoke the command
The Joshua decoder will start running, accepting input from STDIN and writing to
STDOUT. Joshua expects its input in the form of a single sentence per line. Each
sentence should first be piped through `prepare.sh`, which normalizes and
-tokenizes the input for the language pack's source language.
+tokenizes the input for the language pack's source language.
- cat sentences.txt | prepare.sh | joshua > output.txt
+ cat example.<SRC> | prepare.sh | joshua > output.<TRG>
It takes some time (sometimes as much as a minute) to load all of the models
into memory, which means there is high latency from startup until the first
@@ -42,7 +45,7 @@ option
You can then connect directly to the socket using nc or telnet:
- cat sentences.txt | prepare.sh | nc localhost 5674 > output.txt
+ cat example.<SRC> | prepare.sh | nc localhost 5674 > output.<TRG>
You can set the RESTful interface by also passing '-server-type http':
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2967b53d/scripts/language-pack/build_lp.sh
----------------------------------------------------------------------
diff --git a/scripts/language-pack/build_lp.sh b/scripts/language-pack/build_lp.sh
index 68cd086..1426768 100755
--- a/scripts/language-pack/build_lp.sh
+++ b/scripts/language-pack/build_lp.sh
@@ -8,17 +8,19 @@ config=$2
mem=$3
credits=$4
benchmark=$5
+example=$6
date=$(date +%Y-%m-%d)
-if [[ -z $5 ]]; then
- echo "Usage: $0 langpair config mem credits-file benchmark-file"
+if [[ -z $6 ]]; then
+ echo "Usage: $0 langpair config mem credits-file benchmark-file example"
echo "where"
echo " langpair is the language pair, (e.g., es-en)"
echo " config is the tuned Joshua config, (1/tune/joshua.config.final)"
echo " mem is the amount of memory the decoder needs"
echo " credits-file is a file describing how the model was built (1/CREDITS"
echo " benchmark-file is a file describing model performance on test sets (1/BENCHMARK)"
+ echo " example is a path prefix to a pair of small (~10 lines) example files"
exit 1
fi
@@ -58,10 +60,19 @@ copy_template() {
> $2
}
-# Copy over critical infrastructure files
+# Create the target directory
[[ ! -d "$dest/target" ]] && mkdir -p "$dest/target"
+
+# Copy over critical infrastructure files
cp $JOSHUA/target/joshua-*-jar-with-dependencies.jar $dest/target
+# Copy over the example files
+for ext in $source_abbr $target_abbr; do
+ [[ ! -s $example.$ext ]] && echo "Can't find example file $example.$ext, quitting" && exit
+ cp $example.$ext $dest/example.$ext
+ chmod 444 $dest/example.$ext
+done
+
# Copy over the web demonstration
cp -a $JOSHUA/demo $dest/web
@@ -81,3 +92,4 @@ chmod 444 $dest/BENCHMARK
# Create the README
copy_template "$JOSHUA/scripts/language-pack/README.template" "$dest/README"
chmod 444 $dest/README
+
[2/2] incubator-joshua git commit: shutup!
Posted by mj...@apache.org.
shutup!
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/f23cd091
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/f23cd091
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/f23cd091
Branch: refs/heads/master
Commit: f23cd0917699cadd52ffaac7e12b45071e21bf8e
Parents: 2967b53
Author: Matt Post <po...@cs.jhu.edu>
Authored: Thu Nov 3 14:50:40 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Thu Nov 3 14:50:40 2016 -0400
----------------------------------------------------------------------
scripts/preparation/tokenize.pl | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/f23cd091/scripts/preparation/tokenize.pl
----------------------------------------------------------------------
diff --git a/scripts/preparation/tokenize.pl b/scripts/preparation/tokenize.pl
index 0ae49cd..a5cae8d 100755
--- a/scripts/preparation/tokenize.pl
+++ b/scripts/preparation/tokenize.pl
@@ -49,7 +49,7 @@ if (!$QUIET) {
load_prefixes($language,\%NONBREAKING_PREFIX);
-if (scalar(%NONBREAKING_PREFIX) eq 0){
+if (scalar(%NONBREAKING_PREFIX) == 0 && ! $QUIET){
print STDERR "Warning: No known abbreviations for language '$language'\n";
}
@@ -262,7 +262,7 @@ sub load_prefixes {
#default back to English if we don't have a language-specific prefix file
if (!(-e $prefixfile)) {
$prefixfile = "$PREFIX_DIR/nonbreaking_prefix.en";
- print STDERR "WARNING: No known abbreviations for language '$language', attempting fall-back to English version...\n";
+ print STDERR "WARNING: No known abbreviations for language '$language', attempting fall-back to English version...\n" unless $QUIET;
die ("ERROR: No abbreviations files found in $PREFIX_DIR\n") unless (-e $prefixfile);
}