You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/26 23:55:43 UTC

[7/9] incubator-joshua git commit: Update existing unit tests to proper decoder/vocab cleanup. Also added two core tests to UnitTests to test phrase-based decoding and kbest extraction.

Update existing unit tests to proper decoder/vocab cleanup. Also added two core tests to UnitTests to test phrase-based decoding and kbest extraction.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/7fdc4cd7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/7fdc4cd7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/7fdc4cd7

Branch: refs/heads/master
Commit: 7fdc4cd72df2f4b94712c222202a458b5dc3361a
Parents: b2ec94f
Author: Felix Hieber <fh...@amazon.com>
Authored: Fri Nov 20 16:24:04 2015 +0530
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Mon Apr 25 19:47:45 2016 -0700

----------------------------------------------------------------------
 resources/kbest_extraction/glue-grammar         |    3 +
 resources/kbest_extraction/grammar              |   25 +
 resources/kbest_extraction/joshua.config        |   27 +
 resources/kbest_extraction/lm.gz                |  Bin 0 -> 2466496 bytes
 resources/kbest_extraction/output.gold          | 3126 ++++++++++++++++++
 resources/kbest_extraction/output.scores.gold   | 3126 ++++++++++++++++++
 resources/phrase_decoder/config                 |   29 +
 resources/phrase_decoder/constrained.config     |   28 +
 .../phrase_decoder/constrained.output.gold      |    5 +
 resources/phrase_decoder/lm.1.gz                |  Bin 0 -> 2235 bytes
 resources/phrase_decoder/output.gold            |    1 +
 resources/phrase_decoder/rules.1.gz             |  Bin 0 -> 2998042 bytes
 .../kbest_extraction/KBestExtractionTest.java   |   62 +
 .../ConstrainedPhraseDecodingTest.java          |   59 +
 .../phrase/decode/PhraseDecodingTest.java       |   59 +
 tst/joshua/system/AlignmentMapTest.java         |    1 +
 tst/joshua/system/KenLmTest.java                |   48 +
 .../system/MultithreadedTranslationTests.java   |    2 -
 18 files changed, 6599 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fdc4cd7/resources/kbest_extraction/glue-grammar
----------------------------------------------------------------------
diff --git a/resources/kbest_extraction/glue-grammar b/resources/kbest_extraction/glue-grammar
new file mode 100644
index 0000000..6a1162f
--- /dev/null
+++ b/resources/kbest_extraction/glue-grammar
@@ -0,0 +1,3 @@
+[GOAL] ||| <s> ||| <s> ||| 0
+[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
+[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fdc4cd7/resources/kbest_extraction/grammar
----------------------------------------------------------------------
diff --git a/resources/kbest_extraction/grammar b/resources/kbest_extraction/grammar
new file mode 100644
index 0000000..a03b2d9
--- /dev/null
+++ b/resources/kbest_extraction/grammar
@@ -0,0 +1,25 @@
+[X] ||| a ||| A ||| 2 
+[X] ||| a ||| B ||| 3
+[X] ||| a ||| C ||| 5
+[X] ||| a ||| D ||| 7
+[X] ||| a ||| E ||| 11
+[X] ||| b ||| A ||| 13
+[X] ||| b ||| B ||| 17
+[X] ||| b ||| C ||| 19
+[X] ||| b ||| D ||| 23
+[X] ||| b ||| E ||| 29
+[X] ||| c ||| A ||| 31
+[X] ||| c ||| B ||| 37
+[X] ||| c ||| C ||| 41
+[X] ||| c ||| D ||| 43
+[X] ||| c ||| E ||| 47
+[X] ||| d ||| A ||| 53
+[X] ||| d ||| B ||| 59
+[X] ||| d ||| C ||| 61
+[X] ||| d ||| D ||| 67
+[X] ||| d ||| E ||| 71
+[X] ||| e ||| A ||| 73
+[X] ||| e ||| B ||| 79
+[X] ||| e ||| C ||| 83
+[X] ||| e ||| D ||| 89
+[X] ||| e ||| E ||| 97

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fdc4cd7/resources/kbest_extraction/joshua.config
----------------------------------------------------------------------
diff --git a/resources/kbest_extraction/joshua.config b/resources/kbest_extraction/joshua.config
new file mode 100644
index 0000000..cdab98e
--- /dev/null
+++ b/resources/kbest_extraction/joshua.config
@@ -0,0 +1,27 @@
+feature-function = StateMinimizingLanguageModel -lm_type kenlm -lm_order 5 -lm_file resources/kbest_extraction/lm.gz
+
+tm = thrax -owner pt -maxspan 12 -path resources/kbest_extraction/grammar
+tm = thrax -owner glue -maxspan -1 -path resources/kbest_extraction/glue-grammar
+
+mark_oovs=false
+
+#tm config
+default_non_terminal=X
+goalSymbol=GOAL
+
+#pruning config
+pop-limit=100
+
+#nbest config
+use_unique_nbest=true
+top-n = 3126
+
+#feature_function = WordPenalty
+feature_function = OOVPenalty
+
+# Model Weights ####
+
+lm_0 1
+tm_pt_0 1
+tm_glue_0 1
+OOVPenalty 10000

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/7fdc4cd7/resources/kbest_extraction/lm.gz
----------------------------------------------------------------------
diff --git a/resources/kbest_extraction/lm.gz b/resources/kbest_extraction/lm.gz
new file mode 100644
index 0000000..a26335e
Binary files /dev/null and b/resources/kbest_extraction/lm.gz differ