You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/17 07:02:19 UTC

[33/51] [partial] incubator-joshua git commit: JOSHUA-252 Make it possible to use Maven to build Joshua

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ae47ca15/src/test/resources/decoder/k-best-extraction/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/k-best-extraction/test.sh b/src/test/resources/decoder/k-best-extraction/test.sh
new file mode 100755
index 0000000..2f12d33
--- /dev/null
+++ b/src/test/resources/decoder/k-best-extraction/test.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -u
+
+cat input.txt | $JOSHUA/bin/joshua-decoder -m 1g -threads 2 -c joshua.config > output 2> log
+
+# Extract the translations and model scores
+cat output | awk -F"\|" '{print $1 "|||" $4 "|||" $10}' > output.scores
+
+# Compare
+diff -u output.scores output.scores.gold > diff
+
+if [ $? -eq 0 ]; then
+  rm -f diff output log output.scores
+  exit 0
+else
+  exit 1
+fi

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ae47ca15/src/test/resources/decoder/left-state/glue-grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/left-state/glue-grammar b/src/test/resources/decoder/left-state/glue-grammar
new file mode 100644
index 0000000..6a1162f
--- /dev/null
+++ b/src/test/resources/decoder/left-state/glue-grammar
@@ -0,0 +1,3 @@
+[GOAL] ||| <s> ||| <s> ||| 0
+[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
+[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ae47ca15/src/test/resources/decoder/left-state/grammar.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/left-state/grammar.gz b/src/test/resources/decoder/left-state/grammar.gz
new file mode 100644
index 0000000..47e8b1e
Binary files /dev/null and b/src/test/resources/decoder/left-state/grammar.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ae47ca15/src/test/resources/decoder/left-state/input.bn
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/left-state/input.bn b/src/test/resources/decoder/left-state/input.bn
new file mode 100644
index 0000000..0f11e7a
--- /dev/null
+++ b/src/test/resources/decoder/left-state/input.bn
@@ -0,0 +1,2 @@
+\u09b0\u09ac\u09c0\u09a8\u09cd\u09a6\u09cd\u09b0\u09a8\u09be\u09a5\u09c7\u09b0 \u099c\u09a8\u09cd\u09ae \u09b9\u09af\u09bc\u09c7\u099b\u09bf\u09b2 \u0995\u09b2\u0995\u09be\u09a4\u09be\u09b0 \u098f\u0995 \u09aa\u09bf\u09b0\u09be\u09b2\u09c0 \u09ac\u09cd\u09b0\u09be\u09b9\u09cd\u09ae\u09a3 \u09aa\u09b0\u09bf\u09ac\u09be\u09b0\u09c7 \u0964
+\u09b8\u09be\u09ae\u09cd\u09aa\u09cd\u09b0\u09a4\u09bf\u0995\u0995\u09be\u09b2\u09c7 \u09ad\u09be\u09b0\u09a4\u09c7\u09b0 \u09b8\u0999\u09cd\u0997\u09c7 \u09af\u09c1\u0995\u09cd\u09a4\u09b0\u09be\u09b7\u09cd\u099f\u09cd\u09b0\u09c7\u09b0 \u09b8\u09ae\u09cd\u09aa\u09b0\u09cd\u0995\u09c7\u09b0 \u0989\u09a8\u09cd\u09a8\u09a4\u09bf \u09b9\u09af\u09bc\u09c7\u099b\u09c7 \u0964

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ae47ca15/src/test/resources/decoder/left-state/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/left-state/joshua.config b/src/test/resources/decoder/left-state/joshua.config
new file mode 100644
index 0000000..dae08c0
--- /dev/null
+++ b/src/test/resources/decoder/left-state/joshua.config
@@ -0,0 +1,44 @@
+lm = kenlm 5 true false 100 lm.gz
+
+tm = thrax pt 12 grammar.gz
+tm = thrax glue -1 glue-grammar
+
+mark_oovs=false
+
+#tm config
+default_non_terminal=X
+goalSymbol=GOAL
+
+#pruning config
+pop-limit=100
+
+#nbest config
+use_unique_nbest=true
+top-n = 300
+
+feature_function = WordPenalty
+feature_function = OOVPenalty
+
+# Model Weights ####
+
+lm_0 1.2373676802179452
+tm_pt_0 -2.4497429277910214
+tm_pt_1 0.7224581556224123
+tm_pt_2 -0.31689069155153504
+tm_pt_3 0.33861043967238036
+tm_pt_4 0.03553113401320236
+tm_pt_5 0.19138972284064748
+tm_pt_6 0.3417994095521415
+tm_pt_7 -0.9936312455671283
+tm_pt_8 0.9070737587091975
+tm_pt_9 0.8202511858619419
+tm_pt_10 0.2593091306160006
+tm_pt_11 0.25597137004462134
+tm_pt_12 0.3538894647790496
+tm_pt_13 -0.36212061186692646
+tm_pt_14 -0.32923261148678096
+tm_pt_15 0.5524863522177359
+tm_pt_16 0.23451595442127693
+tm_glue_0 1
+WordPenalty -3.6942747832593694
+OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ae47ca15/src/test/resources/decoder/left-state/lm.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/left-state/lm.gz b/src/test/resources/decoder/left-state/lm.gz
new file mode 100644
index 0000000..a26335e
Binary files /dev/null and b/src/test/resources/decoder/left-state/lm.gz differ