You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/01 02:51:11 UTC

[16/94] [abbrv] [partial] incubator-joshua git commit: Pulled JOSHUA-252 changes and Resolved Merge Conflicts

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/num_translation_options/lm.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/num_translation_options/lm.gz b/src/test/resources/decoder/num_translation_options/lm.gz
new file mode 100644
index 0000000..a26335e
Binary files /dev/null and b/src/test/resources/decoder/num_translation_options/lm.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/num_translation_options/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/num_translation_options/output.gold b/src/test/resources/decoder/num_translation_options/output.gold
new file mode 100644
index 0000000..4203822
--- /dev/null
+++ b/src/test/resources/decoder/num_translation_options/output.gold
@@ -0,0 +1,12 @@
+-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
+-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
+-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
+-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
+-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
+-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
+-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
+-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000
+-19.196 ||| i like taco bell ||| tm_pt_0=4.000 tm_glue_0=1.000 lm_0=-17.449 WordPenalty=-2.606 OOVPenalty=0.000
+-19.733 ||| i love taco bell ||| tm_pt_0=5.000 tm_glue_0=1.000 lm_0=-18.690 WordPenalty=-2.606 OOVPenalty=0.000
+-22.883 ||| i appreciate taco bell ||| tm_pt_0=3.000 tm_glue_0=1.000 lm_0=-19.620 WordPenalty=-2.606 OOVPenalty=0.000
+-424.954 ||| yo quiero taco bell ||| tm_pt_0=0.000 tm_glue_0=4.000 lm_0=-21.293 WordPenalty=-2.606 OOVPenalty=-400.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/num_translation_options/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/num_translation_options/test.sh b/src/test/resources/decoder/num_translation_options/test.sh
new file mode 100755
index 0000000..e413526
--- /dev/null
+++ b/src/test/resources/decoder/num_translation_options/test.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+set -u
+
+cat input | $JOSHUA/bin/joshua-decoder -c joshua.config > output 2> log
+cat input | $JOSHUA/bin/joshua-decoder -c joshua.config -no-dot-chart >> output 2>> log
+cat input | $JOSHUA/bin/joshua-decoder -c joshua.config.packed >> output 2>> log
+
+# Compare
+diff -u output output.gold > diff
+
+if [ $? -eq 0 ]; then
+  rm -f diff log output output.scores
+  exit 0
+else
+  exit 1
+fi

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/oov-list/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/config b/src/test/resources/decoder/oov-list/config
new file mode 100644
index 0000000..048b517
--- /dev/null
+++ b/src/test/resources/decoder/oov-list/config
@@ -0,0 +1,29 @@
+lm = kenlm 5 false false 100 ../n-ary/lm.gz
+
+tm = thrax phrase 20 grammar
+tm = thrax glue -1 glue-grammar
+
+mark_oovs = true
+
+default-non-terminal = X
+goalSymbol = GOAL
+
+#pruning config
+pop-limit = 100
+
+#nbest config
+use_unique_nbest = true
+use_tree_nbest = false
+top_n = 1
+
+oov-list = CD 0.0488752 JJ 0.186114 NN 0.291795 NNS 0.0894967 NP 0.117171 OOV 0.033015 VB 0.0313967 VBG 0.0404596 VBN 0.0317203 
+
+feature-function = WordPenalty
+feature-function = OOVPenalty
+
+lm_0 1.2373676802179452
+
+tm_phrase_0 1
+tm_glue_0 1
+WordPenalty -3.6942747832593694
+OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/oov-list/glue-grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/glue-grammar b/src/test/resources/decoder/oov-list/glue-grammar
new file mode 100644
index 0000000..f988151
--- /dev/null
+++ b/src/test/resources/decoder/oov-list/glue-grammar
@@ -0,0 +1,3 @@
+[GOAL] ||| <s> ||| <s> ||| 0
+[GOAL] ||| [GOAL,1] [S,2] ||| [GOAL,1] [S,2] ||| -1
+[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/oov-list/grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/grammar b/src/test/resources/decoder/oov-list/grammar
new file mode 100644
index 0000000..f12d78b
--- /dev/null
+++ b/src/test/resources/decoder/oov-list/grammar
@@ -0,0 +1,11 @@
+[NP-S] ||| GOATS ||| Goats ||| 0
+[VP] ||| EAT ||| eat ||| 0
+[NP-O] ||| CHEESE ||| cheese ||| 0
+[VP] ||| [VB,1] ||| [VB,1] ||| 0
+[S] ||| [NP-O,1] [NP-S,2] [VP,3] ||| [NP-S,2] [VP,3] [NP-O,1] ||| 0
+[S] ||| [NP,1] [VP,2] [NP,3] ||| [NP,1] [VP,2] [NP,3] ||| 0
+[A] ||| 1 ||| i ||| 0
+[B] ||| 2 ||| will ||| 0
+[C] ||| 3 ||| go ||| 0
+[D] ||| 4 ||| home ||| 0
+[S] ||| [C,1] [A,2] [D,3] [B,4] ||| [A,2] [B,4] [C,1] [D,3] ||| 0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/oov-list/input.txt
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/input.txt b/src/test/resources/decoder/oov-list/input.txt
new file mode 100644
index 0000000..23fd45a
--- /dev/null
+++ b/src/test/resources/decoder/oov-list/input.txt
@@ -0,0 +1,3 @@
+CHEESE GOATS EAT
+3 1 4 2
+goets eet cheez

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/oov-list/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/output.gold b/src/test/resources/decoder/oov-list/output.gold
new file mode 100644
index 0000000..d911c52
--- /dev/null
+++ b/src/test/resources/decoder/oov-list/output.gold
@@ -0,0 +1,3 @@
+0 ||| Goats eat cheese ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-16.587 WordPenalty=-2.171 OOVPenalty=0.000 ||| -11.503
+1 ||| i will go home ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-12.155 WordPenalty=-2.606 OOVPenalty=0.000 ||| -4.414
+2 ||| goets_OOV eet_OOV cheez_OOV ||| tm_phrase_0=0.000 tm_glue_0=1.000 lm_0=-17.700 WordPenalty=-2.171 OOVPenalty=-7.749 ||| -20.629

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/oov-list/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/oov-list/test.sh b/src/test/resources/decoder/oov-list/test.sh
new file mode 100755
index 0000000..38c1718
--- /dev/null
+++ b/src/test/resources/decoder/oov-list/test.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -u
+
+cat input.txt | $JOSHUA/bin/joshua-decoder -m 1g -threads 1 -c config > output 2> log
+
+# Compare
+diff -u output output.gold > diff
+
+if [ $? -eq 0 ]; then
+	rm -f diff log output output.scores
+	exit 0
+else
+	exit 1
+fi

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/constrained/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/constrained/config b/src/test/resources/decoder/phrase/constrained/config
new file mode 100644
index 0000000..be45e0a
--- /dev/null
+++ b/src/test/resources/decoder/phrase/constrained/config
@@ -0,0 +1,29 @@
+tm = moses pt 0 ../decode/rules.1.gz
+
+lm = kenlm 5 true false 100 ../decode/lm.1.gz
+
+mark-oovs = false
+pop-limit = 10
+top-n = 5
+
+output-format = %i ||| %s ||| %f ||| %c
+
+include-align-index = false
+reordering-limit = 10
+use-unique-nbest = false
+
+# And these are the feature functions to activate.
+feature-function = OOVPenalty
+feature-function = WordPenalty
+feature-function = Distortion
+feature-function = PhrasePenalty -owner pt
+
+OOVPenalty 1.0
+Distortion 0.114849
+WordPenalty -0.201544
+PhrasePenalty -0.236965
+tm_pt_0 0.0370068
+tm_pt_1 0.0495759
+tm_pt_2 0.196742
+tm_pt_3 0.0745423
+lm_0 0.204412452147565

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/constrained/corpus.es
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/constrained/corpus.es b/src/test/resources/decoder/phrase/constrained/corpus.es
new file mode 100644
index 0000000..a063f9a
--- /dev/null
+++ b/src/test/resources/decoder/phrase/constrained/corpus.es
@@ -0,0 +1 @@
+una estrategia republicana para obstaculizar la reelecci�n de Obama ||| President Obama to hinder a strategy for Republican re @-@ election

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/constrained/glue.grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/constrained/glue.grammar b/src/test/resources/decoder/phrase/constrained/glue.grammar
new file mode 100644
index 0000000..6a1162f
--- /dev/null
+++ b/src/test/resources/decoder/phrase/constrained/glue.grammar
@@ -0,0 +1,3 @@
+[GOAL] ||| <s> ||| <s> ||| 0
+[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
+[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/constrained/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/constrained/output.gold b/src/test/resources/decoder/phrase/constrained/output.gold
new file mode 100644
index 0000000..a784043
--- /dev/null
+++ b/src/test/resources/decoder/phrase/constrained/output.gold
@@ -0,0 +1,5 @@
+0 ||| President Obama to hinder a strategy for Republican re @-@ election ||| tm_pt_0=-15.792 tm_pt_1=-17.550 tm_pt_2=-14.599 tm_pt_3=-18.298 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-24.000 PhrasePenalty=7.000 ||| -15.163
+0 ||| President Obama to hinder a strategy for Republican re @-@ election ||| tm_pt_0=-16.919 tm_pt_1=-17.550 tm_pt_2=-14.917 tm_pt_3=-18.298 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-24.000 PhrasePenalty=8.000 ||| -15.505
+0 ||| President Obama to hinder a strategy for Republican re @-@ election ||| tm_pt_0=-14.986 tm_pt_1=-17.951 tm_pt_2=-14.075 tm_pt_3=-18.699 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-32.000 PhrasePenalty=6.000 ||| -15.762
+0 ||| President Obama to hinder a strategy for Republican re @-@ election ||| tm_pt_0=-16.112 tm_pt_1=-17.951 tm_pt_2=-14.393 tm_pt_3=-18.699 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-32.000 PhrasePenalty=7.000 ||| -16.103
+0 ||| President Obama to hinder a strategy for Republican re @-@ election ||| tm_pt_0=-16.329 tm_pt_1=-17.951 tm_pt_2=-15.136 tm_pt_3=-18.699 lm_0=-29.452 OOVPenalty=0.000 WordPenalty=-4.777 Distortion=-32.000 PhrasePenalty=7.000 ||| -16.257

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/constrained/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/constrained/test.sh b/src/test/resources/decoder/phrase/constrained/test.sh
new file mode 100755
index 0000000..7703aa4
--- /dev/null
+++ b/src/test/resources/decoder/phrase/constrained/test.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -u
+
+cat corpus.es | $JOSHUA/bin/joshua-decoder -threads 1 -c config > output 2> log
+
+# Compare
+diff -u output output.gold > diff
+
+if [ $? -eq 0 ]; then
+  rm -f diff output log
+  exit 0
+else
+  exit 1
+fi
+
+

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/config b/src/test/resources/decoder/phrase/decode/config
new file mode 100644
index 0000000..9987b1a
--- /dev/null
+++ b/src/test/resources/decoder/phrase/decode/config
@@ -0,0 +1,29 @@
+tm = moses -owner pt -maxspan 0 -path rules.packed -max-source-len 5
+feature-function = StateMinimizingLanguageModel -lm_order 5 -lm_file lm.1.gz
+
+search = stack
+
+mark-oovs = false
+pop-limit = 10
+top-n = 1
+
+output-format = %i ||| %s ||| %f ||| %c
+
+include-align-index = false
+reordering-limit = 6
+
+# And these are the feature functions to activate.
+feature-function = OOVPenalty
+feature-function = WordPenalty
+feature-function = Distortion
+feature-function = PhrasePenalty -owner pt
+
+OOVPenalty 1.0
+Distortion 0.114849
+WordPenalty -0.201544
+PhrasePenalty -0.236965
+tm_pt_0 0.0370068
+tm_pt_1 0.0495759
+tm_pt_2 0.196742
+tm_pt_3 0.0745423
+lm_0 0.204412452147565

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/config.packed
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/config.packed b/src/test/resources/decoder/phrase/decode/config.packed
new file mode 100644
index 0000000..9987b1a
--- /dev/null
+++ b/src/test/resources/decoder/phrase/decode/config.packed
@@ -0,0 +1,29 @@
+tm = moses -owner pt -maxspan 0 -path rules.packed -max-source-len 5
+feature-function = StateMinimizingLanguageModel -lm_order 5 -lm_file lm.1.gz
+
+search = stack
+
+mark-oovs = false
+pop-limit = 10
+top-n = 1
+
+output-format = %i ||| %s ||| %f ||| %c
+
+include-align-index = false
+reordering-limit = 6
+
+# And these are the feature functions to activate.
+feature-function = OOVPenalty
+feature-function = WordPenalty
+feature-function = Distortion
+feature-function = PhrasePenalty -owner pt
+
+OOVPenalty 1.0
+Distortion 0.114849
+WordPenalty -0.201544
+PhrasePenalty -0.236965
+tm_pt_0 0.0370068
+tm_pt_1 0.0495759
+tm_pt_2 0.196742
+tm_pt_3 0.0745423
+lm_0 0.204412452147565

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/corpus.es
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/corpus.es b/src/test/resources/decoder/phrase/decode/corpus.es
new file mode 100644
index 0000000..6e255f9
--- /dev/null
+++ b/src/test/resources/decoder/phrase/decode/corpus.es
@@ -0,0 +1 @@
+una estrategia republicana para obstaculizar la reelecci�n de Obama 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/lm.1.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/lm.1.gz b/src/test/resources/decoder/phrase/decode/lm.1.gz
new file mode 100644
index 0000000..3f4c453
Binary files /dev/null and b/src/test/resources/decoder/phrase/decode/lm.1.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/output.gold b/src/test/resources/decoder/phrase/decode/output.gold
new file mode 100644
index 0000000..0083345
--- /dev/null
+++ b/src/test/resources/decoder/phrase/decode/output.gold
@@ -0,0 +1 @@
+0 ||| a strategy republican to hinder reelection Obama ||| tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 OOVPenalty=0.000 WordPenalty=-3.040 Distortion=0.000 PhrasePenalty=5.000 ||| -7.496

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/rules.1.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.1.gz b/src/test/resources/decoder/phrase/decode/rules.1.gz
new file mode 100644
index 0000000..14466e9
Binary files /dev/null and b/src/test/resources/decoder/phrase/decode/rules.1.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/rules.packed/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/config b/src/test/resources/decoder/phrase/decode/rules.packed/config
new file mode 100644
index 0000000..287da2d
--- /dev/null
+++ b/src/test/resources/decoder/phrase/decode/rules.packed/config
@@ -0,0 +1 @@
+max-source-len = 5

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/rules.packed/encoding
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/encoding b/src/test/resources/decoder/phrase/decode/rules.packed/encoding
new file mode 100644
index 0000000..57e7b75
Binary files /dev/null and b/src/test/resources/decoder/phrase/decode/rules.packed/encoding differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features
new file mode 100644
index 0000000..b67c809
Binary files /dev/null and b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.features differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source
new file mode 100644
index 0000000..8679998
Binary files /dev/null and b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.source differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target
new file mode 100644
index 0000000..07aefbe
Binary files /dev/null and b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup
new file mode 100644
index 0000000..3e8c294
Binary files /dev/null and b/src/test/resources/decoder/phrase/decode/rules.packed/slice_00000.target.lookup differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary b/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary
new file mode 100644
index 0000000..8b5e4d5
Binary files /dev/null and b/src/test/resources/decoder/phrase/decode/rules.packed/vocabulary differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/test-packed.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/test-packed.sh b/src/test/resources/decoder/phrase/decode/test-packed.sh
new file mode 100755
index 0000000..a65c031
--- /dev/null
+++ b/src/test/resources/decoder/phrase/decode/test-packed.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -u
+
+cat corpus.es | $JOSHUA/bin/joshua-decoder -threads 1 -c config.packed > output 2> log
+
+# Compare
+diff -u output output.gold > diff
+
+if [ $? -eq 0 ]; then
+  rm -f diff output log
+  exit 0
+else
+  exit 1
+fi
+
+

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/decode/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/decode/test.sh b/src/test/resources/decoder/phrase/decode/test.sh
new file mode 100755
index 0000000..4732f73
--- /dev/null
+++ b/src/test/resources/decoder/phrase/decode/test.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+set -u
+
+cat corpus.es | $JOSHUA/bin/joshua-decoder -threads 1 -c config > output 2> log
+
+# Compare
+diff -u output output.gold > diff
+
+if [ $? -eq 0 ]; then
+  rm -f diff output log
+  exit 0
+else
+  exit 1
+fi
+
+

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/include-align-index/README
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/README b/src/test/resources/decoder/phrase/include-align-index/README
new file mode 100644
index 0000000..d0c0813
--- /dev/null
+++ b/src/test/resources/decoder/phrase/include-align-index/README
@@ -0,0 +1,2 @@
+Added non-functioning example that will test outputting phrase alignments if
+that ability is ever restored.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/include-align-index/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/config b/src/test/resources/decoder/phrase/include-align-index/config
new file mode 100644
index 0000000..f30014d
--- /dev/null
+++ b/src/test/resources/decoder/phrase/include-align-index/config
@@ -0,0 +1,29 @@
+tm = moses -owner pt -maxspan 0 -path rules.1.gz -max-source-len 5
+feature-function = StateMinimizingLanguageModel -lm_order 5 -lm_file lm.1.gz
+
+search = stack
+
+mark-oovs = false
+pop-limit = 10
+top-n = 1
+
+output-format = %i ||| %s ||| %f ||| %c
+
+include-align-index = true
+reordering-limit = 6
+
+# And these are the feature functions to activate.
+feature-function = OOVPenalty
+feature-function = WordPenalty
+feature-function = Distortion
+feature-function = PhrasePenalty -owner pt
+
+OOVPenalty 1.0
+Distortion 0.114849
+WordPenalty -0.201544
+PhrasePenalty -0.236965
+tm_pt_0 0.0370068
+tm_pt_1 0.0495759
+tm_pt_2 0.196742
+tm_pt_3 0.0745423
+lm_0 0.204412452147565

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/include-align-index/corpus.es
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/corpus.es b/src/test/resources/decoder/phrase/include-align-index/corpus.es
new file mode 100644
index 0000000..6e255f9
--- /dev/null
+++ b/src/test/resources/decoder/phrase/include-align-index/corpus.es
@@ -0,0 +1 @@
+una estrategia republicana para obstaculizar la reelecci�n de Obama 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/include-align-index/lm.1.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/lm.1.gz b/src/test/resources/decoder/phrase/include-align-index/lm.1.gz
new file mode 100644
index 0000000..3f4c453
Binary files /dev/null and b/src/test/resources/decoder/phrase/include-align-index/lm.1.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/include-align-index/log
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/log b/src/test/resources/decoder/phrase/include-align-index/log
new file mode 100644
index 0000000..05cd80f
--- /dev/null
+++ b/src/test/resources/decoder/phrase/include-align-index/log
@@ -0,0 +1,50 @@
+Parameters read from configuration file:
+    tm = 'moses -owner pt -maxspan 0 -path rules.1.gz -max-source-len 5'
+    featurefunction = 'StateMinimizingLanguageModel -lm_order 5 -lm_file lm.1.gz'
+    search = 'stack'
+    markoovs = 'false'
+    poplimit = '10'
+    topn = '1'
+    outputformat = '%i ||| %s ||| %f ||| %c'
+    includealignindex = 'true'
+    reorderinglimit = '6'
+    featurefunction = 'OOVPenalty'
+    featurefunction = 'WordPenalty'
+    featurefunction = 'Distortion'
+    featurefunction = 'PhrasePenalty -owner pt'
+Parameters overridden from the command line:
+    threads = '1'
+    c = 'config'
+Read 9 weights (0 of them dense)
+Reading grammar from file rules.1.gz...
+........10........20........30........40........50........60........70........80........90.....100%
+MemoryBasedBatchGrammar: Read 165161 rules with 18 distinct source sides from 'rules.1.gz'
+Couldn't create a GrammarReader for file null with format phrase
+MemoryBasedBatchGrammar: Read 0 rules with 0 distinct source sides from 'null'
+Memory used 219.6 MB
+Grammar loading took: 0 seconds.
+Stateful object with state index 0
+Loading the LM will be faster if you build a binary file.
+Reading lm.1.gz
+----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100
+****************************************************************************************************
+FEATURE: tm_pt (weight 0.000)
+FEATURE: tm_custom (weight 0.000)
+FEATURE: lm_0, order 5 (weight 0.204)
+FEATURE: OOVPenalty (weight 1.000)
+FEATURE: WordPenalty (weight -0.202)
+FEATURE: Distortion (weight 0.115)
+FEATURE: PhrasePenalty (weight -0.237)
+Grammar sorting happening lazily on-demand.
+Model loading took 0 seconds
+Memory used 219.6 MB
+Input 0: <s> una estrategia republicana para obstaculizar la reelecci�n de Obama </s>
+Input 0: Collecting options took 0.000 seconds
+Input 0: Search took 0.013 seconds
+Input 0: Translation took 1.532 seconds
+Input 0: Memory used is 392.5 MB
+Translation 0: -7.496 a strategy republican to hinder reelection Obama 
+Input 0: 1-best extraction took 0.026 seconds
+Decoding completed.
+Memory used 401.6 MB
+Total running time: 2 seconds

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/include-align-index/output
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/output b/src/test/resources/decoder/phrase/include-align-index/output
new file mode 100644
index 0000000..509a3de
--- /dev/null
+++ b/src/test/resources/decoder/phrase/include-align-index/output
@@ -0,0 +1 @@
+0 ||| a strategy |0-1| republican |2-2| to hinder |3-4| reelection |5-6| Obama |7-8| ||| tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 OOVPenalty=0.000 WordPenalty=-3.040 Distortion=0.000 PhrasePenalty=5.000 ||| -7.496

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/include-align-index/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/output.gold b/src/test/resources/decoder/phrase/include-align-index/output.gold
new file mode 100644
index 0000000..509a3de
--- /dev/null
+++ b/src/test/resources/decoder/phrase/include-align-index/output.gold
@@ -0,0 +1 @@
+0 ||| a strategy |0-1| republican |2-2| to hinder |3-4| reelection |5-6| Obama |7-8| ||| tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 OOVPenalty=0.000 WordPenalty=-3.040 Distortion=0.000 PhrasePenalty=5.000 ||| -7.496

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/include-align-index/rules.1.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/rules.1.gz b/src/test/resources/decoder/phrase/include-align-index/rules.1.gz
new file mode 100644
index 0000000..14466e9
Binary files /dev/null and b/src/test/resources/decoder/phrase/include-align-index/rules.1.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/include-align-index/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/include-align-index/test.sh b/src/test/resources/decoder/phrase/include-align-index/test.sh
new file mode 100644
index 0000000..4732f73
--- /dev/null
+++ b/src/test/resources/decoder/phrase/include-align-index/test.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+set -u
+
+cat corpus.es | $JOSHUA/bin/joshua-decoder -threads 1 -c config > output 2> log
+
+# Compare
+diff -u output output.gold > diff
+
+if [ $? -eq 0 ]; then
+  rm -f diff output log
+  exit 0
+else
+  exit 1
+fi
+
+

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/unique-hypotheses/README
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/README b/src/test/resources/decoder/phrase/unique-hypotheses/README
new file mode 100644
index 0000000..753f57e
--- /dev/null
+++ b/src/test/resources/decoder/phrase/unique-hypotheses/README
@@ -0,0 +1 @@
+Ensures that derivations are unique for the phrase-based decoder.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/unique-hypotheses/corpus.es
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/corpus.es b/src/test/resources/decoder/phrase/unique-hypotheses/corpus.es
new file mode 120000
index 0000000..11373db
--- /dev/null
+++ b/src/test/resources/decoder/phrase/unique-hypotheses/corpus.es
@@ -0,0 +1 @@
+../decode/corpus.es
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config b/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
new file mode 100644
index 0000000..c35b267
--- /dev/null
+++ b/src/test/resources/decoder/phrase/unique-hypotheses/joshua.config
@@ -0,0 +1,23 @@
+tm = moses pt 0 rules.1.gz
+default-non-terminal = X
+goal-symbol = GOAL
+lm = kenlm 5 true false 100 lm.1.gz
+mark-oovs = false
+pop-limit = 100
+top-n = 300
+use-unique-nbest = true
+output-format = %s
+include-align-index = false
+feature-function = OOVPenalty
+feature-function = WordPenalty
+feature_function = Distortion
+feature_function = PhrasePenalty
+lm_0 1.0
+tm_pt_1 1.0
+tm_pt_3 1.0
+tm_pt_0 1.0
+tm_pt_2 1.0
+WordPenalty -2.844814
+OOVPenalty 1.0
+PhrasePenalty 1.0
+Distortion 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz b/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
new file mode 120000
index 0000000..3655f03
--- /dev/null
+++ b/src/test/resources/decoder/phrase/unique-hypotheses/lm.1.gz
@@ -0,0 +1 @@
+../decode/lm.1.gz
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/unique-hypotheses/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/output.gold b/src/test/resources/decoder/phrase/unique-hypotheses/output.gold
new file mode 100644
index 0000000..0e5fb98
--- /dev/null
+++ b/src/test/resources/decoder/phrase/unique-hypotheses/output.gold
@@ -0,0 +1,300 @@
+a strategy republican for hinder the re @-@ election of Obama
+a strategy republican for hinder the reelection of Obama
+a strategy republican for obstruct the re @-@ election of Obama
+a strategy republican for obstruct the reelection of Obama
+a strategy republican for hamper the re @-@ election of Obama
+a strategy republican for hamper the reelection of Obama
+a strategy republican to obstruct the re @-@ election of Obama
+a strategy republican for hinder reelection of Obama
+a strategy republican to obstruct the reelection of Obama
+a strategy republican to hinder the re @-@ election of Obama
+a strategy republican to hinder the reelection of Obama
+a strategy republican for obstruct reelection of Obama
+a strategy republican for hinder the reelection Obama
+a strategy republican for hamper reelection of Obama
+a strategy republican for hindering the re @-@ election of Obama
+a strategy republican for obstruct the reelection Obama
+a strategy republican for hindering the reelection of Obama
+an strategy republican for hinder the re @-@ election of Obama
+a strategy republican for hamper the reelection Obama
+an strategy republican for hinder the reelection of Obama
+a strategy republican for obstructing the re @-@ election of Obama
+a strategy republican to hinder reelection of Obama
+a strategy republican hinder for the re @-@ election of Obama
+a strategy republican for obstructing the reelection of Obama
+an strategy republican for obstruct the re @-@ election of Obama
+a strategy republican for the hinder reelection of Obama
+a strategy republican hinder for the reelection of Obama
+a strategy republican for hinder reelection the of Obama
+an strategy republican for obstruct the reelection of Obama
+a strategy republican for hinder reelection Obama
+a strategy republican to obstruct the reelection Obama
+strategy a republican for hinder the re @-@ election of Obama
+a strategy republican obstruct for the re @-@ election of Obama
+a strategy republican for hinder the reelection Obama of
+a strategy republican to hinder the reelection Obama
+a strategy republican for hinder reelection of the Obama
+strategy a republican for hinder the reelection of Obama
+a strategy republican obstruct for the reelection of Obama
+an strategy republican for hamper the re @-@ election of Obama
+a strategy republican for obstruct reelection the of Obama
+a strategy republican for hinder the of reelection Obama
+a strategy republican to obstruct reelection of Obama
+a strategy republican for obstruct reelection Obama
+an strategy republican for hamper the reelection of Obama
+strategy a republican for obstruct the re @-@ election of Obama
+a strategy republican for obstruct the reelection Obama of
+a strategy republican hamper for the re @-@ election of Obama
+a strategy republican for obstruct reelection of the Obama
+strategy a republican for obstruct the reelection of Obama
+a strategy republican for obstruct the of reelection Obama
+a strategy republican for hinder the of Obama reelection
+a strategy republican for hindering reelection of Obama
+a strategy republican hamper for the reelection of Obama
+a strategy republican for hamper reelection the of Obama
+an strategy republican to obstruct the re @-@ election of Obama
+a strategy republican for hamper reelection Obama
+a strategy republican for hinder of the reelection Obama
+an strategy republican for hinder reelection of Obama
+strategy a republican for hamper the re @-@ election of Obama
+an strategy republican to obstruct the reelection of Obama
+an strategy republican to hinder the re @-@ election of Obama
+a strategy republican for hamper the reelection Obama of
+a strategy republican for hindering the reelection Obama
+a strategy republican for hinder the re @-@ election Obama
+a strategy republican for obstruct the of Obama reelection
+a strategy republican for hamper reelection of the Obama
+strategy a republican for hamper the reelection of Obama
+an strategy republican to hinder the reelection of Obama
+one strategy republican for hinder the re @-@ election of Obama
+a strategy republican for obstructing reelection of Obama
+a strategy republican for hamper the of reelection Obama
+a strategy republican for hinder the reelection from Obama
+a strategy republican for hinder reelection the Obama
+one strategy republican for hinder the reelection of Obama
+a strategy republican for obstruct of the reelection Obama
+a strategy republican for hinder the Obama reelection of
+an strategy republican for obstruct reelection of Obama
+a strategy republican for hinder reelection of Obama the
+a strategy republican for the reelection hinder of Obama
+a strategy republican for obstruct the re @-@ election Obama
+an strategy republican for hinder the reelection Obama
+a strategy republican to hinder reelection the of Obama
+a strategy republican for hinder &apos;s reelection Obama
+strategy a republican to obstruct the re @-@ election of Obama
+one strategy republican for obstruct the re @-@ election of Obama
+a strategy republican for obstruct the reelection from Obama
+a strategy republican for hamper the of Obama reelection
+a strategy republican for hinder reelection Obama of
+a strategy republican to hinder reelection Obama
+strategy a republican for hinder reelection of Obama
+a strategy republican to obstruct the reelection Obama of
+a strategy republican for obstruct reelection the Obama
+strategy a republican to obstruct the reelection of Obama
+one strategy republican for obstruct the reelection of Obama
+strategy a republican to hinder the re @-@ election of Obama
+a strategy republican for obstructing the reelection Obama
+a strategy republican for obstruct the Obama reelection of
+a strategy republican for obstruct reelection of Obama the
+a strategy republican to obstruct the of reelection Obama
+a strategy republican to hinder the reelection Obama of
+a strategy republican for the hinder reelection Obama
+a strategy republican hinder for the reelection Obama
+a strategy republican for hamper of the reelection Obama
+a strategy republican to hinder reelection of the Obama
+an strategy republican for hamper reelection of Obama
+a strategy republican for the reelection obstruct of Obama
+strategy a republican to hinder the reelection of Obama
+an strategy republican for hindering the re @-@ election of Obama
+an strategy republican for obstruct the reelection Obama
+a strategy republican for hinder the reelection Obama &apos;s
+a strategy republican to hinder the of reelection Obama
+a strategy republican for obstruct &apos;s reelection Obama
+a strategy republican for hinder the reelection for Obama
+a strategy republican for hamper the re @-@ election Obama
+a strategy republican for obstruct reelection Obama of
+an strategy republican for hindering the reelection of Obama
+strategy a republican for obstruct reelection of Obama
+one strategy republican for hamper the re @-@ election of Obama
+a strategy republican for hamper the reelection from Obama
+a strategy republican for hinder the Obama of reelection
+a strategy republican for hinder the Obama reelection
+a strategy republican for the re @-@ election of Obama hinder
+a strategy republican to obstruct reelection the of Obama
+a strategy republican for hinder of Obama the reelection
+a strategy republican to obstruct the of Obama reelection
+strategy a republican for hinder the reelection Obama
+a strategy republican for hamper reelection the Obama
+a strategy republican obstruct for the reelection Obama
+one strategy republican for hamper the reelection of Obama
+a strategy republican for hamper the Obama reelection of
+a strategy republican for hamper reelection of Obama the
+a strategy republican for the reelection of Obama hinder
+a strategy republican to obstruct reelection Obama
+a strategy republican for obstruct the reelection Obama &apos;s
+a strategy republican to hinder the of Obama reelection
+a strategy republican for obstruct the reelection for Obama
+an strategy republican for hamper the reelection Obama
+a strategy republican for hinder reelection Obama of the
+a strategy republican for hamper &apos;s reelection Obama
+a strategy republican to obstruct reelection of the Obama
+a strategy republican for obstruct the Obama of reelection
+a strategy republican for hamper reelection Obama of
+a strategy republican for obstruct the Obama reelection
+a strategy republican for hinder &apos;s re @-@ election Obama
+a strategy republican for the re @-@ election of Obama obstruct
+an strategy republican for obstructing the re @-@ election of Obama
+strategy a republican for hamper reelection of Obama
+a strategy republican to hinder of the reelection Obama
+a strategy republican for obstruct of Obama the reelection
+a strategy republican to obstruct the re @-@ election Obama
+strategy a republican for hindering the re @-@ election of Obama
+strategy a republican for obstruct the reelection Obama
+an strategy republican to hinder reelection of Obama
+a strategy republican for hindering reelection the of Obama
+a strategy republican for hinder reelection from Obama
+an strategy republican hinder for the re @-@ election of Obama
+strategy an republican for hinder the re @-@ election of Obama
+one strategy republican to obstruct the re @-@ election of Obama
+a strategy republican for the reelection of Obama obstruct
+a strategy republican to obstruct the reelection from Obama
+an strategy republican for obstructing the reelection of Obama
+a strategy republican for hindering the reelection Obama of
+a strategy republican for hinder the re @-@ election Obama of
+a strategy republican for hindering reelection Obama
+a strategy republican hamper for the reelection Obama
+strategy a republican for hindering the reelection of Obama
+a strategy republican to hinder the re @-@ election Obama
+one strategy republican for hinder reelection of Obama
+an strategy republican for the hinder reelection of Obama
+an strategy republican hinder for the reelection of Obama
+strategy an republican for hinder the reelection of Obama
+a strategy republican for hamper the reelection Obama &apos;s
+a strategy republican for hindering the of reelection Obama
+one strategy republican to obstruct the reelection of Obama
+one strategy republican to hinder the re @-@ election of Obama
+an strategy republican for hinder reelection the of Obama
+a strategy republican to hinder the reelection from Obama
+a strategy republican for hinder re @-@ election of the Obama
+a strategy republican to obstruct the Obama reelection of
+a strategy republican for obstruct reelection Obama of the
+a strategy republican for hamper the reelection for Obama
+a strategy republican for reelection hinder of Obama
+a strategy republican for hinder reelection the Obama of
+a strategy republican for hindering reelection of the Obama
+a strategy republican for obstruct &apos;s re @-@ election Obama
+an strategy republican for hinder reelection Obama
+a strategy republican to hinder reelection the Obama
+a strategy republican hinder the re @-@ election of Obama for
+one strategy republican to hinder the reelection of Obama
+a strategy republican for hinder of reelection the Obama
+a strategy republican to hinder the Obama reelection of
+an strategy republican to obstruct the reelection Obama
+a strategy republican to hinder reelection of Obama the
+a strategy republican for hamper the Obama of reelection
+a strategy republican for hinder the of re @-@ election Obama
+a strategy republican for hamper the Obama reelection
+a strategy republican for obstruct reelection from Obama
+an strategy republican obstruct for the re @-@ election of Obama
+a strategy republican for the re @-@ election of Obama hamper
+strategy an republican for obstruct the re @-@ election of Obama
+a strategy republican for obstructing reelection the of Obama
+a strategy republican for hinder of Obama reelection the
+a strategy republican for reelection of Obama hinder the
+a strategy republican for hamper of Obama the reelection
+a strategy republican hinder the reelection of Obama for
+strategy a republican for hamper the reelection Obama
+a strategy republican for obstruct the re @-@ election Obama of
+an strategy republican for hinder the reelection Obama of
+an strategy republican to hinder the reelection Obama
+an strategy republican for hinder reelection of the Obama
+one strategy republican for obstruct reelection of Obama
+a strategy republican for obstructing reelection Obama
+an strategy republican obstruct for the reelection of Obama
+a strategy republican for the reelection of Obama hamper
+strategy an republican for obstruct the reelection of Obama
+a strategy republican to hinder &apos;s reelection Obama
+a strategy republican to obstruct of the reelection Obama
+a strategy republican for hindering the of Obama reelection
+an strategy republican for obstruct reelection the of Obama
+a strategy republican for obstruct re @-@ election of the Obama
+an strategy republican for hinder the of reelection Obama
+strategy a republican for obstructing the re @-@ election of Obama
+an strategy republican to obstruct reelection of Obama
+a strategy republican to hinder reelection Obama of
+a strategy republican for reelection obstruct of Obama
+strategy a republican to hinder reelection of Obama
+one strategy republican for hinder the reelection Obama
+a strategy republican for obstruct reelection the Obama of
+a strategy republican to the reelection hinder of Obama
+strategy a republican hinder for the re @-@ election of Obama
+an strategy republican for obstruct reelection Obama
+a strategy republican for obstructing the reelection Obama of
+a strategy republican for hinder reelection Obama the
+a strategy republican for hinder reelection Obama &apos;s
+a strategy republican obstruct the re @-@ election of Obama for
+a strategy republican for hamper reelection Obama of the
+a strategy republican for reelection of hinder the Obama
+a strategy republican for obstructing reelection of the Obama
+a strategy republican for obstruct of reelection the Obama
+strategy a republican for obstructing the reelection of Obama
+a strategy republican to obstruct the reelection Obama &apos;s
+a strategy republican for obstruct the of re @-@ election Obama
+a strategy republican for hinder reelection for Obama
+a strategy republican for hamper &apos;s re @-@ election Obama
+a strategy republican for the hinder reelection Obama of
+a strategy republican hinder for the reelection Obama of
+a strategy republican for hinder of the re @-@ election Obama
+a strategy republican for the reelection hinder Obama
+a strategy republican to obstruct the reelection for Obama
+a strategy republican for hinder reelection Obama the of
+a strategy republican for obstructing the of reelection Obama
+a strategy republican for obstruct of Obama reelection the
+strategy a republican for the hinder reelection of Obama
+strategy a republican hinder for the reelection of Obama
+a strategy republican obstruct the reelection of Obama for
+a strategy republican for the reelection of hinder Obama
+an strategy republican for obstruct the reelection Obama of
+a strategy republican for hamper reelection from Obama
+an strategy republican hamper for the re @-@ election of Obama
+strategy a republican for hinder reelection the of Obama
+a strategy republican to hinder the reelection Obama &apos;s
+an strategy republican for obstruct reelection of the Obama
+strategy an republican for hamper the re @-@ election of Obama
+a strategy republican hinder for the of reelection Obama
+a strategy republican to obstruct reelection the Obama
+a strategy republican to obstruct the Obama of reelection
+a strategy republican to hinder the reelection for Obama
+a strategy republican for hamper the re @-@ election Obama of
+a strategy republican for hindering the re @-@ election Obama
+a strategy republican to obstruct the Obama reelection
+an strategy republican for obstruct the of reelection Obama
+a strategy republican for hindering of the reelection Obama
+a strategy republican to obstruct reelection of Obama the
+strategy a republican for hinder reelection Obama
+an strategy republican for hinder the of Obama reelection
+one strategy republican for hamper reelection of Obama
+an strategy republican for hindering reelection of Obama
+an strategy republican hamper for the reelection of Obama
+one strategy republican for hindering the re @-@ election of Obama
+strategy a republican to obstruct the reelection Obama
+strategy an republican for hamper the reelection of Obama
+one strategy republican for obstruct the reelection Obama
+a strategy republican for hindering the reelection from Obama
+a strategy republican to the reelection obstruct of Obama
+a strategy republican for reelection of Obama hinder
+a strategy republican for hinder of Obama reelection
+an strategy republican for hamper reelection the of Obama
+strategy a republican obstruct for the re @-@ election of Obama
+a strategy republican for hamper re @-@ election of the Obama
+a strategy republican for obstruct reelection Obama the
+a strategy republican for obstruct reelection Obama &apos;s
+a strategy republican to hinder the Obama of reelection
+a strategy republican to hinder the Obama reelection
+a strategy for hinder the re @-@ election of Obama republican
+a strategy republican for obstruct reelection for Obama
+strategy a republican for hinder the reelection Obama of
+a strategy republican for hamper reelection the Obama of
+a strategy republican obstruct for the reelection Obama of
+a strategy republican for obstruct of the re @-@ election Obama
+one strategy republican for hindering the reelection of Obama

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz b/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
new file mode 120000
index 0000000..a6183d9
--- /dev/null
+++ b/src/test/resources/decoder/phrase/unique-hypotheses/rules.1.gz
@@ -0,0 +1 @@
+../decode/rules.1.gz
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/phrase/unique-hypotheses/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/phrase/unique-hypotheses/test.sh b/src/test/resources/decoder/phrase/unique-hypotheses/test.sh
new file mode 100755
index 0000000..6b25957
--- /dev/null
+++ b/src/test/resources/decoder/phrase/unique-hypotheses/test.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -u
+
+cat corpus.es | $JOSHUA/bin/joshua-decoder -threads 1 -c joshua.config > output 2> log
+
+# Compare
+num=$(sort -u output | wc -l)
+
+if [ $num -eq 300 ]; then
+  rm -f output log
+  exit 0
+else
+  exit 1
+fi
+
+

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar-both-rule-types/.gitignore
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/.gitignore b/src/test/resources/decoder/regexp-grammar-both-rule-types/.gitignore
new file mode 100644
index 0000000..d937c7f
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar-both-rule-types/.gitignore
@@ -0,0 +1,2 @@
+diff
+output

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar-both-rule-types/README
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/README b/src/test/resources/decoder/regexp-grammar-both-rule-types/README
new file mode 100644
index 0000000..226fa64
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar-both-rule-types/README
@@ -0,0 +1,16 @@
+This tests the case where something matched *both* a regex and a non-regex
+rule (or two regexes), but the (correct) regex rule wasn't winning. It should
+be the case, if the code is right, that if you change the order of the rules in
+your grammar, you still get the same output translations.
+
+This test tests the use of regular expressions in the grammar.  This is an
+experimental feature with an inefficient implementation in the decoder, but
+there are a number of things that could be done to make it more efficient if
+the technique proves useful.
+
+To enable it, you set the Joshua parameter
+
+  regexp-grammar = OWNER
+
+where OWNER is the owner of one or more grammars whose rules might be interpreted as regular
+expressions.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar-both-rule-types/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/config b/src/test/resources/decoder/regexp-grammar-both-rule-types/config
new file mode 100644
index 0000000..0fb4c0c
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar-both-rule-types/config
@@ -0,0 +1,9 @@
+tm = regexp regexp 10 ./regexp-grammar
+tm = thrax glue -1 ./glue-grammar
+mark-oovs = true
+goal-symbol = GOAL
+top-n = 3
+
+weights-file = weights
+
+feature-function = OOVPenalty

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar-both-rule-types/glue-grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/glue-grammar b/src/test/resources/decoder/regexp-grammar-both-rule-types/glue-grammar
new file mode 100644
index 0000000..6a1162f
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar-both-rule-types/glue-grammar
@@ -0,0 +1,3 @@
+[GOAL] ||| <s> ||| <s> ||| 0
+[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
+[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar-both-rule-types/input
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/input b/src/test/resources/decoder/regexp-grammar-both-rule-types/input
new file mode 100644
index 0000000..5531876
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar-both-rule-types/input
@@ -0,0 +1,5 @@
+chica linda
+chicos lindos
+chicos lind?s
+1928371028
+192837102

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar-both-rule-types/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/output.gold b/src/test/resources/decoder/regexp-grammar-both-rule-types/output.gold
new file mode 100644
index 0000000..c8edb86
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar-both-rule-types/output.gold
@@ -0,0 +1,12 @@
+0 ||| girl feminine-singular-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=0.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -4.000
+0 ||| girl feminine-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -5.000
+0 ||| girl generic-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-2.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -6.000
+1 ||| boys masculine-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -5.000
+1 ||| boys generic-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-2.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -6.000
+1 ||| boys lindos_OOV ||| tm_regexp_0=-1.000 tm_regexp_1=0.000 tm_glue_0=2.000 OOVPenalty=-100.000 ||| -103.000
+2 ||| boys generic-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-2.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -6.000
+2 ||| boys lind?s_OOV ||| tm_regexp_0=-1.000 tm_regexp_1=0.000 tm_glue_0=2.000 OOVPenalty=-100.000 ||| -103.000
+2 ||| chicos_OOV generic-pretty ||| tm_regexp_0=-1.000 tm_regexp_1=-2.000 tm_glue_0=2.000 OOVPenalty=-100.000 ||| -105.000
+3 ||| really big number ||| tm_regexp_0=-1.000 tm_regexp_1=-1.000 tm_glue_0=1.000 OOVPenalty=0.000 ||| -3.000
+3 ||| 1928371028_OOV ||| tm_regexp_0=0.000 tm_regexp_1=0.000 tm_glue_0=1.000 OOVPenalty=-100.000 ||| -101.000
+4 ||| 192837102_OOV ||| tm_regexp_0=0.000 tm_regexp_1=0.000 tm_glue_0=1.000 OOVPenalty=-100.000 ||| -101.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar-both-rule-types/regexp-grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/regexp-grammar b/src/test/resources/decoder/regexp-grammar-both-rule-types/regexp-grammar
new file mode 100644
index 0000000..c93dc80
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar-both-rule-types/regexp-grammar
@@ -0,0 +1,12 @@
+[X] ||| blah linda ||| feminine-singular-pretty blah ||| 1 0
+[X] ||| \d{10,} ||| really big number ||| 1 1
+[X] ||| lindo.* ||| masculine-pretty ||| 1 1
+[X] ||| linda.* ||| feminine-pretty ||| 1 1
+[X] ||| lind.* ||| generic-pretty ||| 1 2
+[X] ||| lindo ||| masculine-singular-pretty ||| 1 0
+[X] ||| linda ||| feminine-singular-pretty ||| 1 0
+[X] ||| chico ||| boy ||| 1 0
+[X] ||| chicos ||| boys ||| 1 0
+[X] ||| chica ||| girl ||| 1 0
+[X] ||| chicas ||| girls ||| 1 0
+[X] ||| grande ||| great ||| 1 0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar-both-rule-types/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/test.sh b/src/test/resources/decoder/regexp-grammar-both-rule-types/test.sh
new file mode 100755
index 0000000..d4b6436
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar-both-rule-types/test.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -u
+
+cat input | $JOSHUA/bin/joshua-decoder -m 1g -c config > output 2> log
+
+diff -u output output.gold > diff
+
+if [ $? -eq 0 ]; then
+    rm -f output log diff
+    exit 0
+else
+    exit 1
+fi

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar-both-rule-types/weights
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/weights b/src/test/resources/decoder/regexp-grammar-both-rule-types/weights
new file mode 100644
index 0000000..a998939
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar-both-rule-types/weights
@@ -0,0 +1,4 @@
+tm_regexp_0 1
+tm_regexp_1 1
+tm_glue_0 -1
+OOVPenalty 1

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar/.gitignore
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/.gitignore b/src/test/resources/decoder/regexp-grammar/.gitignore
new file mode 100644
index 0000000..d937c7f
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar/.gitignore
@@ -0,0 +1,2 @@
+diff
+output

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar/README
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/README b/src/test/resources/decoder/regexp-grammar/README
new file mode 100644
index 0000000..df81a67
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar/README
@@ -0,0 +1,10 @@
+This test tests the use of regular expressions in the grammar.  This is an experimental feature with
+an inefficient implementation in the decoder, but there are a number of things that could be done to
+make it more efficient if the technique proves useful.
+
+To enable it, you set the Joshua parameter
+
+  regexp-grammar = OWNER
+
+where OWNER is the owner of one or more grammars whose rules might be interpreted as regular
+expressions.

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/config b/src/test/resources/decoder/regexp-grammar/config
new file mode 100644
index 0000000..526dba0
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar/config
@@ -0,0 +1,11 @@
+tm = regexp regexp 10 ./regexp-grammar
+tm = thrax glue -1 ./glue-grammar
+mark-oovs = true
+goal-symbol = GOAL
+regexp-grammar = regexp
+
+weights-file = weights
+
+feature-function = OOVPenalty
+
+

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar/glue-grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/glue-grammar b/src/test/resources/decoder/regexp-grammar/glue-grammar
new file mode 100644
index 0000000..6a1162f
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar/glue-grammar
@@ -0,0 +1,3 @@
+[GOAL] ||| <s> ||| <s> ||| 0
+[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
+[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar/input
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/input b/src/test/resources/decoder/regexp-grammar/input
new file mode 100644
index 0000000..8cdf0f8
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar/input
@@ -0,0 +1,4 @@
+chica linda
+chico lindo
+1928371028
+192837102

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/output.gold b/src/test/resources/decoder/regexp-grammar/output.gold
new file mode 100644
index 0000000..49c5ea4
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar/output.gold
@@ -0,0 +1,4 @@
+0 ||| girl pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -5.000
+1 ||| boy pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -5.000
+2 ||| really big number ||| tm_regexp_0=-1.000 tm_regexp_1=0.000 tm_glue_0=1.000 OOVPenalty=0.000 ||| -2.000
+3 ||| 192837102_OOV ||| tm_regexp_0=0.000 tm_regexp_1=0.000 tm_glue_0=1.000 OOVPenalty=-100.000 ||| -101.000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar/regexp-grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/regexp-grammar b/src/test/resources/decoder/regexp-grammar/regexp-grammar
new file mode 100644
index 0000000..6f6c57c
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar/regexp-grammar
@@ -0,0 +1,6 @@
+[X] ||| lind.* ||| pretty ||| 1 1
+[X] ||| lindo ||| [boy version of pretty] ||| 10 0 
+[X] ||| linda ||| [girl version of pretty] ||| 10 0 
+[X] ||| chico ||| boy ||| 1 0
+[X] ||| chica ||| girl ||| 1 0
+[X] ||| \d{10,} ||| really big number ||| 1 0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/test.sh b/src/test/resources/decoder/regexp-grammar/test.sh
new file mode 100755
index 0000000..3235bd4
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar/test.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -u
+
+cat input | $JOSHUA/bin/joshua-decoder -c config > output 2> log
+
+diff -u output output.gold > diff
+
+if [ $? -eq 0 ]; then
+  rm -rf output log diff
+	exit 0
+else
+	exit 1
+fi

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/regexp-grammar/weights
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/weights b/src/test/resources/decoder/regexp-grammar/weights
new file mode 100644
index 0000000..4782753
--- /dev/null
+++ b/src/test/resources/decoder/regexp-grammar/weights
@@ -0,0 +1,5 @@
+tm_regexp_0 1
+tm_regexp_1 1
+tm_glue_0 -1
+
+OOVPenalty 1

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/rescoring/glue-grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/glue-grammar b/src/test/resources/decoder/rescoring/glue-grammar
new file mode 100644
index 0000000..6a1162f
--- /dev/null
+++ b/src/test/resources/decoder/rescoring/glue-grammar
@@ -0,0 +1,3 @@
+[GOAL] ||| <s> ||| <s> ||| 0
+[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
+[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/rescoring/grammar.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/grammar.gz b/src/test/resources/decoder/rescoring/grammar.gz
new file mode 100644
index 0000000..6708c0d
Binary files /dev/null and b/src/test/resources/decoder/rescoring/grammar.gz differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/rescoring/input.txt
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/input.txt b/src/test/resources/decoder/rescoring/input.txt
new file mode 100644
index 0000000..5562a01
--- /dev/null
+++ b/src/test/resources/decoder/rescoring/input.txt
@@ -0,0 +1,2 @@
+el nino tomo la cucaracha |||  ||| the boy ate the cockroach
+el nino tomo la cucaracha |||  ||| the big storm swarmed the coast ||| the big storm only swarmed the coast

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/rescoring/joshua.config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/joshua.config b/src/test/resources/decoder/rescoring/joshua.config
new file mode 100644
index 0000000..0e4a277
--- /dev/null
+++ b/src/test/resources/decoder/rescoring/joshua.config
@@ -0,0 +1,31 @@
+rescore-forest = true
+rescore-forest-weight = 100
+
+lm = kenlm 5 false false 100 ../constrained/lm.gz
+
+tm = thrax pt 12 grammar.gz
+tm = thrax glue -1 glue-grammar
+
+mark-oovs = true
+
+default-non-terminal = X
+goalSymbol = GOAL
+
+#pruning config
+pop-limit = 100
+
+#output-format = %i %c %s
+
+#nbest config
+use_unique_nbest = true
+top_n = 2
+
+feature-function = WordPenalty
+feature-function = OOVPenalty
+
+lm_0 1.2373676802179452
+
+tm_pt_0 1
+tm_glue_0 1
+WordPenalty -1
+OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/rescoring/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/output.gold b/src/test/resources/decoder/rescoring/output.gold
new file mode 100644
index 0000000..5d6600d
--- /dev/null
+++ b/src/test/resources/decoder/rescoring/output.gold
@@ -0,0 +1,12 @@
+0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+0 ||| the kid eated the cockroach ||| tm_pt_0=-15.000 tm_glue_0=5.000 lm_0=-20.053 WordPenalty=-3.040 OOVPenalty=0.000 ||| -31.773
+0 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+0 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240
+1 ||| the boy ate the cockroach ||| tm_pt_0=-6.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -19.240
+1 ||| the boy eated the cockroach ||| tm_pt_0=-11.000 tm_glue_0=5.000 lm_0=-17.198 WordPenalty=-3.040 OOVPenalty=0.000 ||| -24.240

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/rescoring/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/rescoring/test.sh b/src/test/resources/decoder/rescoring/test.sh
new file mode 100755
index 0000000..58f2d2d
--- /dev/null
+++ b/src/test/resources/decoder/rescoring/test.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -u
+
+cat input.txt | $JOSHUA/bin/joshua-decoder -m 1g -threads 1 -c joshua.config > output 2> log
+
+# Compare
+diff -u output output.gold > diff
+
+if [ $? -eq 0 ]; then
+	rm -f diff log output 
+	exit 0
+else
+	exit 1
+fi

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/segment-oovs/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/segment-oovs/config b/src/test/resources/decoder/segment-oovs/config
new file mode 100644
index 0000000..0541bee
--- /dev/null
+++ b/src/test/resources/decoder/segment-oovs/config
@@ -0,0 +1,41 @@
+tm = thrax pt 1 ../../lattice/grammar.test
+tm = thrax glue -1 ../../lattice/glue-grammar
+
+#lm config
+lm = kenlm 3 false false 100 ../../lattice/test.lm
+
+#tm config
+default_non_terminal=X
+goalSymbol=GOAL
+
+#pruning config
+pop-limit = 100
+
+#nbest config
+use_unique_nbest = true 
+include-align-index = false
+top_n = 300
+
+# this shouldn't apply to the lattice
+maxlen = 1
+
+feature-function = OOVPenalty
+feature-function = WordPenalty
+feature-function = SourcePath
+
+###### model weights
+#lm order weight
+lm_0 0.0
+
+#phrasemodel owner column(0-indexed) weight
+tm_pt_0 0.2
+tm_pt_1 0.3
+tm_pt_2 0.5
+
+tm_glue_0 0.0
+
+#wordpenalty weight
+WordPenalty -1.0
+SourcePath 1.0
+
+OOVPenalty 1.0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/test/resources/decoder/segment-oovs/input.txt
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/segment-oovs/input.txt b/src/test/resources/decoder/segment-oovs/input.txt
new file mode 100644
index 0000000..01f142f
--- /dev/null
+++ b/src/test/resources/decoder/segment-oovs/input.txt
@@ -0,0 +1 @@
+ein golfloch