You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/24 22:53:28 UTC

[07/18] incubator-joshua git commit: added training script

added training script


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/dc6b4112
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/dc6b4112
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/dc6b4112

Branch: refs/heads/morph
Commit: dc6b41129d21dd647dbf20919c4093b4495f80fb
Parents: 155249f
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Apr 22 19:09:39 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Apr 22 19:09:39 2016 -0400

----------------------------------------------------------------------
 scripts/morph/train-mallet.sh | 13 +++++++++++++
 1 file changed, 13 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc6b4112/scripts/morph/train-mallet.sh
----------------------------------------------------------------------
diff --git a/scripts/morph/train-mallet.sh b/scripts/morph/train-mallet.sh
new file mode 100644
index 0000000..cfc7802
--- /dev/null
+++ b/scripts/morph/train-mallet.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# Trains a mallet model on source-annotated data of the form
+#
+# source_word target_word feat:val feat:val feat:val
+
+if [[ -z $2 ]]; then
+  echo "Usage: train-mallet.sh DATA_FILE MODEL_FILE"
+  echo "This will read data from DATA_FILE and serialize the models to MODEL_FILE"
+  exit
+fi
+
+java -mx16g -cp $JOSHUA/lib/mallet-2.0.7.jar:$JOSHUA/lib/trove4j-2.0.2.jar:$JOSHUA/class joshua.decoder.ff.LexicalSharpener $1 $2