You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/09/14 09:11:55 UTC

[33/43] incubator-joshua git commit: Make ChartState start at index 1.

Make ChartState start at index 1.

Fixes bug with state 0 which was getting confused for the vocab id 0 aka <unk>.
The sign bit distinguishes a word from a ChartState id.
Written by @kpu on Kellen's laptop.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/bdd670bd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/bdd670bd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/bdd670bd

Branch: refs/heads/7
Commit: bdd670bd0554a73c0de0db5383e07ce5e8df586f
Parents: 0252942
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Tue Sep 13 17:28:51 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Tue Sep 13 17:46:50 2016 +0200

----------------------------------------------------------------------
 jni/kenlm_wrap.cc                                     | 5 +++--
 src/test/java/org/apache/joshua/system/KenLmTest.java | 3 +--
 2 files changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bdd670bd/jni/kenlm_wrap.cc
----------------------------------------------------------------------
diff --git a/jni/kenlm_wrap.cc b/jni/kenlm_wrap.cc
index bbe6e7c..bd82fe4 100644
--- a/jni/kenlm_wrap.cc
+++ b/jni/kenlm_wrap.cc
@@ -92,11 +92,12 @@ class Chart {
       if (!ins.second) {
         vec_.pop_back();
       }
-      return *ins.first;
+      return *ins.first + 1; // +1 so that the first id is 1, not 0.  We use sign bit to 
+                             // distinguish ChartState from vocab id.  
     }
 
     const lm::ngram::ChartState &InterpretState(StateIndex index) const {
-      return vec_[index];
+      return vec_[index - 1];
     }
 
   private:

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/bdd670bd/src/test/java/org/apache/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/KenLmTest.java b/src/test/java/org/apache/joshua/system/KenLmTest.java
index 003b5d9..9f26f8f 100644
--- a/src/test/java/org/apache/joshua/system/KenLmTest.java
+++ b/src/test/java/org/apache/joshua/system/KenLmTest.java
@@ -80,7 +80,6 @@ public class KenLmTest {
     // THEN
     assertEquals("ngram probabilities differ for word and id based n-gram query", prob_string, prob_id,
             Float.MIN_VALUE);
-
   }
 
   @Test
@@ -106,7 +105,7 @@ public class KenLmTest {
 
     // THEN
     assertThat(result, is(notNullValue()));
-    assertThat(result.state.getState(), is(0L));
+    assertThat(result.state.getState(), is(1L));
     assertThat(result.prob, is(-3.7906885f));
   }