You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/24 19:45:48 UTC

[27/41] incubator-joshua git commit: Moved test file locations from resources/ to src/test/resources

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/lm_oov/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/lm_oov/joshua.config b/src/test/resources/lm_oov/joshua.config
new file mode 100644
index 0000000..9cbd603
--- /dev/null
+++ b/src/test/resources/lm_oov/joshua.config
@@ -0,0 +1,17 @@
+feature-function = LanguageModel -lm_type berkeleylm -lm_order 5 -lm_file src/test/resources/berkeley_lm/lm -oov_feature
+
+tm = thrax -owner pt -maxspan 12 -path src/test/resources/kbest_extraction/grammar
+tm = thrax -owner glue -maxspan -1 -path src/test/resources/kbest_extraction/glue-grammar
+
+top-n = 0
+
+#feature_function = WordPenalty
+feature_function = OOVPenalty
+
+# Model Weights ####
+
+lm_0 0
+lm_0_oov 1
+OOVPenalty 1
+tm_pt_0 0
+tm_glue 0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/phrase_decoder/config
----------------------------------------------------------------------
diff --git a/src/test/resources/phrase_decoder/config b/src/test/resources/phrase_decoder/config
new file mode 100644
index 0000000..11e0108
--- /dev/null
+++ b/src/test/resources/phrase_decoder/config
@@ -0,0 +1,29 @@
+tm = moses -owner pt -maxspan 0 -path src/test/resources/phrase_decoder/rules.1.gz -max-source-len 5
+feature-function = StateMinimizingLanguageModel -lm_order 5 -lm_file src/test/resources/phrase_decoder/lm.1.gz
+
+search = stack
+
+mark-oovs = false
+pop-limit = 10
+top-n = 1
+
+output-format = %i ||| %s ||| %f ||| %c
+
+include-align-index = true
+reordering-limit = 6
+
+# And these are the feature functions to activate.
+feature-function = OOVPenalty
+feature-function = WordPenalty
+feature-function = Distortion
+feature-function = PhrasePenalty -owner pt
+
+OOVPenalty 1.0
+Distortion 0.114849
+WordPenalty -0.201544
+PhrasePenalty -0.236965
+tm_pt_0 0.0370068
+tm_pt_1 0.0495759
+tm_pt_2 0.196742
+tm_pt_3 0.0745423
+lm_0 0.204412452147565

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/phrase_decoder/constrained.config
----------------------------------------------------------------------
diff --git a/src/test/resources/phrase_decoder/constrained.config b/src/test/resources/phrase_decoder/constrained.config
new file mode 100644
index 0000000..ffa988c
--- /dev/null
+++ b/src/test/resources/phrase_decoder/constrained.config
@@ -0,0 +1,28 @@
+tm = moses pt 0 src/test/resources/phrase_decoder/rules.1.gz
+
+lm = kenlm 5 true false 100 src/test/resources/phrase_decoder/lm.1.gz
+
+mark-oovs = false
+pop-limit = 10
+top-n = 5
+
+output-format = %i ||| %s ||| %f ||| %c
+
+include-align-index = true
+reordering-limit = 10
+
+# And these are the feature functions to activate.
+feature-function = OOVPenalty
+feature-function = WordPenalty
+feature-function = Distortion
+feature-function = PhrasePenalty -owner pt
+
+OOVPenalty 1.0
+Distortion 0.114849
+WordPenalty -0.201544
+PhrasePenalty -0.236965
+tm_pt_0 0.0370068
+tm_pt_1 0.0495759
+tm_pt_2 0.196742
+tm_pt_3 0.0745423
+lm_0 0.204412452147565

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/phrase_decoder/constrained.output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/phrase_decoder/constrained.output.gold b/src/test/resources/phrase_decoder/constrained.output.gold
new file mode 100644
index 0000000..238387c
--- /dev/null
+++ b/src/test/resources/phrase_decoder/constrained.output.gold
@@ -0,0 +1,5 @@
+0 ||| President Obama |8-8| to |7-7| hinder |4-4| a strategy |0-1| for |3-3| Republican |2-2| re @-@ election |5-6| ||| tm_pt_0=-15.792 tm_pt_1=-17.550 tm_pt_2=-14.599 tm_pt_3=-18.298 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-24.000 PhrasePenalty=7.000 ||| -15.163
+0 ||| President Obama |8-8| to |7-7| hinder |4-4| a |0-0| strategy |1-1| for |3-3| Republican |2-2| re @-@ election |5-6| ||| tm_pt_0=-16.919 tm_pt_1=-17.550 tm_pt_2=-14.917 tm_pt_3=-18.298 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-24.000 PhrasePenalty=8.000 ||| -15.505
+0 ||| President Obama |8-8| to hinder |3-4| a strategy |0-1| for |7-7| Republican |2-2| re @-@ election |5-6| ||| tm_pt_0=-14.986 tm_pt_1=-17.951 tm_pt_2=-14.075 tm_pt_3=-18.699 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-32.000 PhrasePenalty=6.000 ||| -15.762
+0 ||| President Obama |8-8| to hinder |3-4| a |0-0| strategy |1-1| for |7-7| Republican |2-2| re @-@ election |5-6| ||| tm_pt_0=-16.112 tm_pt_1=-17.951 tm_pt_2=-14.393 tm_pt_3=-18.699 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-32.000 PhrasePenalty=7.000 ||| -16.103
+0 ||| President Obama |8-8| to |3-3| hinder |4-4| a strategy |0-1| for |7-7| Republican |2-2| re @-@ election |5-6| ||| tm_pt_0=-16.329 tm_pt_1=-17.951 tm_pt_2=-15.136 tm_pt_3=-18.699 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-32.000 PhrasePenalty=7.000 ||| -16.257

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/phrase_decoder/lm.1.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/phrase_decoder/lm.1.gz b/src/test/resources/phrase_decoder/lm.1.gz
new file mode 100644
index 0000000..3f4c453
Binary files /dev/null and b/src/test/resources/phrase_decoder/lm.1.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/phrase_decoder/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/phrase_decoder/output.gold b/src/test/resources/phrase_decoder/output.gold
new file mode 100644
index 0000000..509a3de
--- /dev/null
+++ b/src/test/resources/phrase_decoder/output.gold
@@ -0,0 +1 @@
+0 ||| a strategy |0-1| republican |2-2| to hinder |3-4| reelection |5-6| Obama |7-8| ||| tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 OOVPenalty=0.000 WordPenalty=-3.040 Distortion=0.000 PhrasePenalty=5.000 ||| -7.496

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/phrase_decoder/rules.1.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/phrase_decoder/rules.1.gz b/src/test/resources/phrase_decoder/rules.1.gz
new file mode 100644
index 0000000..14466e9
Binary files /dev/null and b/src/test/resources/phrase_decoder/rules.1.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/wa_grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/wa_grammar b/src/test/resources/wa_grammar
new file mode 100644
index 0000000..82d0052
--- /dev/null
+++ b/src/test/resources/wa_grammar
@@ -0,0 +1,3 @@
+[X] ||| A [X,1] B1 [X,2] B2 C ||| a b [X,2] c1 [X,1] c2 ||| 1 1 1 1 1 1 OOV=1 ||| 0-0 2-1 4-1 5-3 5-5
+[X] ||| U Z1 Z2 ||| n1 u z ||| 1 1 1 1 1 1 OOV=2 ||| 0-1 1-2 2-2
+[X] ||| K ||| k1 k2 k3 n1 n2 n3 ||| 1 1 1 1 1 1 OOV=4 ||| 0-0 0-1 0-2
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/wa_grammar.packed/config
----------------------------------------------------------------------
diff --git a/src/test/resources/wa_grammar.packed/config b/src/test/resources/wa_grammar.packed/config
new file mode 100644
index 0000000..fbc07d0
--- /dev/null
+++ b/src/test/resources/wa_grammar.packed/config
@@ -0,0 +1,2 @@
+max-source-len = 6
+version = 3

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/wa_grammar.packed/encoding
----------------------------------------------------------------------
diff --git a/src/test/resources/wa_grammar.packed/encoding b/src/test/resources/wa_grammar.packed/encoding
new file mode 100644
index 0000000..630f69f
Binary files /dev/null and b/src/test/resources/wa_grammar.packed/encoding differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/wa_grammar.packed/slice_00000.alignments
----------------------------------------------------------------------
diff --git a/src/test/resources/wa_grammar.packed/slice_00000.alignments b/src/test/resources/wa_grammar.packed/slice_00000.alignments
new file mode 100644
index 0000000..f1425eb
Binary files /dev/null and b/src/test/resources/wa_grammar.packed/slice_00000.alignments differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/wa_grammar.packed/slice_00000.features
----------------------------------------------------------------------
diff --git a/src/test/resources/wa_grammar.packed/slice_00000.features b/src/test/resources/wa_grammar.packed/slice_00000.features
new file mode 100644
index 0000000..5a4c774
Binary files /dev/null and b/src/test/resources/wa_grammar.packed/slice_00000.features differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/wa_grammar.packed/slice_00000.source
----------------------------------------------------------------------
diff --git a/src/test/resources/wa_grammar.packed/slice_00000.source b/src/test/resources/wa_grammar.packed/slice_00000.source
new file mode 100644
index 0000000..4607b89
Binary files /dev/null and b/src/test/resources/wa_grammar.packed/slice_00000.source differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/wa_grammar.packed/slice_00000.target
----------------------------------------------------------------------
diff --git a/src/test/resources/wa_grammar.packed/slice_00000.target b/src/test/resources/wa_grammar.packed/slice_00000.target
new file mode 100644
index 0000000..fe11a38
Binary files /dev/null and b/src/test/resources/wa_grammar.packed/slice_00000.target differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/wa_grammar.packed/slice_00000.target.lookup
----------------------------------------------------------------------
diff --git a/src/test/resources/wa_grammar.packed/slice_00000.target.lookup b/src/test/resources/wa_grammar.packed/slice_00000.target.lookup
new file mode 100644
index 0000000..7d82179
Binary files /dev/null and b/src/test/resources/wa_grammar.packed/slice_00000.target.lookup differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/20e6bf4b/src/test/resources/wa_grammar.packed/vocabulary
----------------------------------------------------------------------
diff --git a/src/test/resources/wa_grammar.packed/vocabulary b/src/test/resources/wa_grammar.packed/vocabulary
new file mode 100644
index 0000000..637651e
Binary files /dev/null and b/src/test/resources/wa_grammar.packed/vocabulary differ