You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/14 09:11:55 UTC
[33/43] incubator-joshua git commit: Make ChartState start at index 1.
Make ChartState start at index 1.
Fixes bug with state 0 which was getting confused for the vocab id 0 aka <unk>.
The sign bit distinguishes a word from a ChartState id.
Written by @kpu on Kellen's laptop.
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/bdd670bd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/bdd670bd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/bdd670bd
Branch: refs/heads/7
Commit: bdd670bd0554a73c0de0db5383e07ce5e8df586f
Parents: 0252942
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Tue Sep 13 17:28:51 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Tue Sep 13 17:46:50 2016 +0200
----------------------------------------------------------------------
jni/kenlm_wrap.cc | 5 +++--
src/test/java/org/apache/joshua/system/KenLmTest.java | 3 +--
2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bdd670bd/jni/kenlm_wrap.cc
----------------------------------------------------------------------
diff --git a/jni/kenlm_wrap.cc b/jni/kenlm_wrap.cc
index bbe6e7c..bd82fe4 100644
--- a/jni/kenlm_wrap.cc
+++ b/jni/kenlm_wrap.cc
@@ -92,11 +92,12 @@ class Chart {
if (!ins.second) {
vec_.pop_back();
}
- return *ins.first;
+ return *ins.first + 1; // +1 so that the first id is 1, not 0. We use sign bit to
+ // distinguish ChartState from vocab id.
}
const lm::ngram::ChartState &InterpretState(StateIndex index) const {
- return vec_[index];
+ return vec_[index - 1];
}
private:
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bdd670bd/src/test/java/org/apache/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/KenLmTest.java b/src/test/java/org/apache/joshua/system/KenLmTest.java
index 003b5d9..9f26f8f 100644
--- a/src/test/java/org/apache/joshua/system/KenLmTest.java
+++ b/src/test/java/org/apache/joshua/system/KenLmTest.java
@@ -80,7 +80,6 @@ public class KenLmTest {
// THEN
assertEquals("ngram probabilities differ for word and id based n-gram query", prob_string, prob_id,
Float.MIN_VALUE);
-
}
@Test
@@ -106,7 +105,7 @@ public class KenLmTest {
// THEN
assertThat(result, is(notNullValue()));
- assertThat(result.state.getState(), is(0L));
+ assertThat(result.state.getState(), is(1L));
assertThat(result.prob, is(-3.7906885f));
}