You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/16 06:27:22 UTC

[66/66] incubator-joshua git commit: JOSHUA-252 Make it possible to use Maven to build Joshua

JOSHUA-252 Make it possible to use Maven to build Joshua


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/ab5bb42c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/ab5bb42c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/ab5bb42c

Branch: refs/heads/JOSHUA-252
Commit: ab5bb42c3a5067521e0ea3e842611ce54a726782
Parents: 7f824b4
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Sun May 15 23:31:01 2016 -0700
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Sun May 15 23:31:01 2016 -0700

----------------------------------------------------------------------
 .../org/apache/joshua/corpus/SymbolTable.java   | 330 ++++++++++++++++++
 .../joshua/decoder/ff/ArityPhrasePenalty.java   |  12 +
 .../joshua/decoder/ff/FeatureFunction.java      |  21 +-
 .../joshua/decoder/ff/LabelCombinationFF.java   |  12 +
 .../joshua/decoder/ff/LabelSubstitutionFF.java  |  12 +
 .../apache/joshua/decoder/ff/OOVPenalty.java    |  12 +
 .../apache/joshua/decoder/ff/PhraseModel.java   |  12 +
 .../apache/joshua/decoder/ff/PhrasePenalty.java |  12 +
 .../apache/joshua/decoder/ff/RuleCountBin.java  |  12 +
 .../org/apache/joshua/decoder/ff/RuleFF.java    |  12 +
 .../apache/joshua/decoder/ff/RuleLength.java    |   2 +-
 .../org/apache/joshua/decoder/ff/RuleShape.java |  12 +
 .../apache/joshua/decoder/ff/SourcePathFF.java  |  12 +
 .../apache/joshua/decoder/ff/TargetBigram.java  |  12 +
 .../apache/joshua/decoder/ff/WordPenalty.java   |  12 +
 .../decoder/ff/fragmentlm/FragmentLMFF.java     |  12 +
 .../apache/joshua/decoder/ff/lm/AbstractLM.java | 133 ++++++++
 .../apache/joshua/decoder/ff/lm/ArpaFile.java   | 335 +++++++++++++++++++
 .../apache/joshua/decoder/ff/lm/ArpaNgram.java  |  73 ++++
 .../joshua/decoder/ff/lm/LanguageModelFF.java   |  12 +
 .../joshua/decoder/ff/lm/buildin_lm/TrieLM.java | 332 ++++++++++++++++++
 .../decoder/ff/lm/buildin_lm/package-info.java  |  19 ++
 .../joshua/decoder/ff/phrase/Distortion.java    |  12 +
 .../ff/similarity/EdgePhraseSimilarityFF.java   |  12 +
 .../joshua/decoder/ff/tm/BilingualRule.java     | 167 +++++++++
 .../joshua/decoder/ff/tm/MonolingualRule.java   | 315 +++++++++++++++++
 .../java/org/apache/joshua/lattice/Lattice.java | 106 +++++-
 .../java/org/apache/joshua/metrics/BLEU.java    |  70 ++--
 .../org/apache/joshua/metrics/BLEU_SBP.java     |   4 +-
 .../apache/joshua/metrics/GradeLevelBLEU.java   |  18 +-
 .../joshua/metrics/MinimumChangeBLEU.java       |   8 +-
 .../java/org/apache/joshua/metrics/Precis.java  |  26 +-
 .../org/apache/joshua/metrics/SourceBLEU.java   |   2 +-
 .../util/quantization/BooleanQuantizer.java     |  45 +++
 .../joshua/util/quantization/Quantizer.java     |  45 +++
 .../quantization/QuantizerConfiguration.java    | 119 +++++++
 .../util/quantization/QuantizerFactory.java     |  50 +++
 .../util/quantization/StatelessQuantizer.java   |  38 +++
 .../joshua/util/quantization/package-info.java  |  19 ++
 .../apache/joshua/corpus/CorpusArrayTest.java   | 304 +++++++++--------
 .../apache/joshua/corpus/VocabularyTest.java    |   2 -
 .../joshua/corpus/vocab/VocabularyTest.java     | 110 +++---
 .../joshua/decoder/DecoderThreadTest.java       |  65 ++--
 .../decoder/ff/ArityPhrasePenaltyFFTest.java    | 128 +++----
 .../joshua/decoder/ff/lm/ArpaFileTest.java      |  48 +--
 .../org/apache/joshua/packed/CountRules.java    |   2 +-
 .../org/apache/joshua/packed/PrintRules.java    |   6 +-
 .../org/apache/joshua/packed/VocabTest.java     |   3 +-
 .../system/MultithreadedTranslationTests.java   |  48 ++-
 .../system/StructuredTranslationTest.java       |  12 +-
 .../org/apache/joshua/util/io/BinaryTest.java   |   7 +-
 .../java/org/apache/joshua/zmert/BLEUTest.java  |  10 +-
 52 files changed, 2786 insertions(+), 428 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/corpus/SymbolTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/SymbolTable.java b/src/main/java/org/apache/joshua/corpus/SymbolTable.java
new file mode 100644
index 0000000..d8b1694
--- /dev/null
+++ b/src/main/java/org/apache/joshua/corpus/SymbolTable.java
@@ -0,0 +1,330 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.corpus; 
+ 
+import java.util.Collection; 
+ 
+/**
+ * Represents a symbol table capable of mapping between strings and 
+ * symbols. 
+ *  
+ * @author Lane Schwartz 
+ * @author Zhifei Li 
+ * @version $LastChangedDate: 2009-11-24 23:07:43 -0600 (Tue, 24 Nov 2009) $ 
+ */ 
+public interface SymbolTable { 
+ 
+ //TODO Remove all hard-coded references to nonterminals 
+  
+ /**
+  * The unknown word's ID will be the size of the vocabulary, 
+  * ensuring that it is outside of the vocabulary. Note that 
+  * for vocabularies which have not been fixed yet, this 
+  * means the actual value is volatile and therefore a word 
+  * ID can only be compared against UNKNOWN_WORD at the time 
+  * the word ID is generated (otherwise unknown words can 
+  * become "known" if new words are added to the vocabulary 
+  * before testing). 
+  * <p> 
+  * Negative IDs are reserved for non-terminals. 
+  * 
+  * Zero is reserved as the UNKNOWN_WORD. 
+  */ 
+ int UNKNOWN_WORD = 1; 
+  
+ /** String representation for out-of-vocabulary words. */ 
+ String UNKNOWN_WORD_STRING = "<unk>"; 
+  
+ /**
+  * Integer representation of the bare (non-indexed) nonterminal X, 
+  * which represents a wild-card gap in a phrase. 
+  * <p> 
+  * All nonterminals are guaranteed to be represented by negative integers. 
+  */ 
+ int X = -1; 
+  
+ /**
+  * String representation of the bare (non-indexed) nonterminal X, 
+  * which represents a wild-card gap in a phrase. 
+  */ 
+ String X_STRING = "[X]"; 
+  
+  
+  
+ /**
+  * String representation of the nonterminal X with index 1, 
+  * which represents a wild-card gap in a phrase. 
+  */ 
+ String X1_STRING = "[X,1]"; 
+  
+  
+  
+ /**
+  * String representation of the nonterminal X with index 2, 
+  * which represents a wild-card gap in a phrase. 
+  */ 
+ String X2_STRING = "[X,2]";  
+  
+ /**
+  * Integer representation of the nonterminal S. 
+  * <p> 
+  * All nonterminals are guaranteed to be represented by negative integers. 
+  */ 
+ int S = -4; 
+  
+ /**
+  * String representation of the nonterminal S.. 
+  */ 
+ String S_STRING = "[S]";  
+  
+ /**
+  * Integer representation of the nonterminal X with index 1, 
+  * which represents a wild-card gap in a phrase. 
+  * <p> 
+  * All nonterminals are guaranteed to be represented by negative integers. 
+  */ 
+ int S1 = -5; 
+  
+ /**
+  * String representation of the nonterminal X with index 2, 
+  * which represents a wild-card gap in a phrase. 
+  */ 
+ String S1_STRING = "[S,1]";  
+  
+ /**
+  * Gets a unique integer identifier for the nonterminal. 
+  * <p> 
+  * The integer returned is guaranteed to be a negative number. 
+  *  
+  * If the nonterminal is {@link #X_STRING}, 
+  * then the value returned must be {@link #X}. 
+  *  
+  * Otherwise, the value returned must be a negative number  
+  * whose value is less than {@link X}. 
+  *  
+  * @param nonterminal Nonterminal symbol 
+  * @return a unique integer identifier for the nonterminal 
+  */ 
+ int addNonterminal(String nonterminal); 
+  
+ /**
+  * Gets a unique integer identifier for the terminal. 
+  *  
+  * @param terminal Terminal symbol 
+  * @return a unique integer identifier for the terminal 
+  */ 
+ int addTerminal(String terminal); 
+  
+ /**
+  * Gets the unique integer identifiers for the words. 
+  *  
+  * @param words Array of symbols 
+  * @return the unique integer identifiers for the words 
+  */ 
+ int[] addTerminals(String[] words); 
+  
+ /**
+  * Gets the unique integer identifiers for the words 
+  * in the sentence. 
+  *  
+  * @param sentence Space-delimited string of symbols 
+  * @return the unique integer identifiers for the words 
+  *         in the sentence 
+  */ 
+ int[] addTerminals(String sentence); 
+  
+ /**
+  * Gets an integer identifier for the word. 
+  * <p> 
+  * If the word is in the vocabulary, the integer returned 
+  * will uniquely identify that word. 
+  * <p> 
+  * If the word is not in the vocabulary, the integer returned 
+  * by <code>getUnknownWordID</code> may be returned. 
+  *  
+  * Alternatively, implementations may, if they choose, add 
+  * unknown words and assign them a symbol ID instead of 
+  * returning <code>getUnknownWordID</code>. 
+  *  
+  * @see #getUnknownWordID 
+  * @return the unique integer identifier for wordString,  
+  *         or the result of <code>getUnknownWordID<code>  
+  *         if wordString is not in the vocabulary 
+  */ 
+ int getID(String wordString); 
+  
+ /**
+  * Gets the integer identifiers for all words in the provided 
+  * sentence. 
+  * <p> 
+  * The sentence will be split (on spaces) into words, then 
+  * the integer identifier for each word will be retrieved 
+  * using <code>getID</code>. 
+  *  
+  * @see #getID(String) 
+  * @param sentence String of words, separated by spaces. 
+  * @return Array of integer identifiers for each word in 
+  *         the sentence 
+  */ 
+ int[] getIDs(String sentence); 
+  
+ /**
+  * Gets the String that corresponds to the specified integer 
+  * identifier. 
+  * <p> 
+  * If the identifier is in the symbol vocabulary, the String 
+  * returned will correspond to that identifier. 
+  *  
+  * Otherwise, the String returned by <code>getUnknownWord<code> 
+  * will be returned. 
+  * 
+  * @return the String that corresponds to the specified 
+  *         integer identifier, or the result of 
+  *         <code>getUnknownWord</code> if the identifier 
+  *         does not correspond to a word in the vocabulary 
+  */ 
+ String getTerminal(int wordID); 
+  
+ /**
+  * Gets the String that corresponds to the specified integer 
+  * identifier. 
+  * <p> 
+  * This method can be called for terminals or nonterminals. 
+  * 
+  * @param tokenID Integer identifier 
+  * @return the String that corresponds to the specified 
+  *         integer identifier 
+  */ 
+ String getWord(int tokenID); 
+  
+ /**
+  * Gets the String that corresponds to the sequence of 
+  * specified integer identifiers. 
+  * 
+  * @param ids Sequence of integer identifiers 
+  * @return the String that corresponds to the sequence of 
+  *         specified integer identifiers 
+  */ 
+ String getWords(int[] ids); 
+  
+ /**
+  *  
+  * @param wordIDs 
+  * @return 
+  */ 
+ String getTerminals(int[] wordIDs); 
+  
+ /**
+  * Gets a collection over all symbol identifiers for the 
+  * vocabulary. 
+  * 
+  * @return a collection over all symbol identifiers for the 
+  *         vocabulary 
+  */ 
+ Collection<Integer> getAllIDs(); 
+  
+ /**
+  * Gets the list of all words represented by this vocabulary. 
+  * 
+  * @return the list of all words represented by this 
+  *         vocabulary 
+  */ 
+ Collection<String> getWords(); 
+  
+ /**
+  * Gets the number of unique words in the vocabulary. 
+  * 
+  * @return the number of unique words in the vocabulary. 
+  */ 
+ int size(); 
+  
+ /**
+  * Gets the integer symbol representation of the unknown 
+  * word. 
+  * 
+  * @return the integer symbol representation of the unknown 
+  *         word. 
+  */ 
+ int getUnknownWordID(); 
+  
+ /**
+  * Gets the string representation of the unknown word. 
+  * 
+  * @return the string representation of the unknown word. 
+  */ 
+ String getUnknownWord(); 
+  
+ /**
+  * Returns <code>true</code> if the symbol id represents a 
+  * nonterminal, <code>false</code> otherwise. 
+  *  
+  * @param id 
+  * @return <code>true</code> if the symbol id represents a 
+  *         nonterminal, <code>false</code> otherwise. 
+  */ 
+ boolean isNonterminal(int id); 
+  
+ /**
+  * Gets the lowest-valued allowable terminal symbol id in 
+  * this table. 
+  * 
+  * @return the lowest-valued allowable terminal symbol id 
+  *         in this table. 
+  */ 
+ int getLowestID(); 
+ 
+  
+ /**
+  * Gets the highest-valued allowable terminal symbol id in 
+  * this table. 
+  * <p> 
+  * NOTE: This may or may not return the same value as 
+  * <code>size</code>. 
+  * 
+  * @return the highest-valued allowable terminal symbol id 
+  *         in this table. 
+  */ 
+ int getHighestID(); 
+  
+ /**
+  *  
+  *  
+  * @param id 
+  * @return 
+  */ 
+ int getTargetNonterminalIndex(int id);//first convert id to its String mapping, then call the function below 
+  
+ /**
+  *  
+  *  
+  * @param word 
+  * @return 
+  */ 
+ int getTargetNonterminalIndex(String word); 
+  
+ /**
+  *  
+  *  
+  * @param wordIDs 
+  * @param ntIndexIncrements 
+  * @return 
+  */ 
+ String getWords(int[] wordIDs, boolean ntIndexIncrements); 
+  
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java b/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
index bb57a6e..25f363d 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/ArityPhrasePenalty.java
@@ -69,4 +69,16 @@ public class ArityPhrasePenalty extends StatelessFF {
     
     return null;
   }
+
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
index fc1e15b..c6112e5 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
@@ -72,7 +72,7 @@ public abstract class FeatureFunction {
    * names, for templates that define multiple features.
    */
   protected String name = null;
-  
+
   /*
    * The list of features each function can contribute, along with the dense feature IDs.
    */
@@ -93,14 +93,14 @@ public abstract class FeatureFunction {
    * instantiated
    */
   protected FeatureVector weights;
-  
+
   /* The config */
   protected JoshuaConfiguration config;
 
   public String getName() {
     return name;
   }
-  
+
   // Whether the feature has state.
   public abstract boolean isStateful();
 
@@ -112,7 +112,7 @@ public abstract class FeatureFunction {
 
     this.parsedArgs = FeatureFunction.parseArgs(args);
   }
-  
+
   /**
    * Any feature function can use this to report dense features names to the master code. The 
    * parameter tells the feature function the index of the first available dense feature ID; the feature
@@ -304,6 +304,15 @@ public abstract class FeatureFunction {
   }
 
   /**
+   * It is used when initializing translation grammars (for 
+   * pruning purpose, and to get stateless logP for each rule). 
+   * This is also required to sort the rules (required by Cube-pruning). 
+   */ 
+  public abstract double estimateLogP(Rule rule, int sentID);
+  
+  public abstract double  getWeight(); 
+
+  /**
    * Accumulator objects allow us to generalize feature computation.
    * ScoreAccumulator takes (feature,value) pairs and simple stores the weighted
    * sum (for decoding). FeatureAccumulator records the named feature values
@@ -326,7 +335,7 @@ public abstract class FeatureFunction {
     public void add(String name, float value) {
       score += value * weights.getSparse(name);
     }
-    
+
     @Override
     public void add(int id, float value) {
       score += value * weights.getDense(id);
@@ -348,7 +357,7 @@ public abstract class FeatureFunction {
     public void add(String name, float value) {
       features.increment(name, value);
     }
-    
+
     @Override
     public void add(int id, float value) {
       features.increment(id,  value);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java b/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
index 1c02853..f80e0b7 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
@@ -60,4 +60,16 @@ public class LabelCombinationFF extends StatelessFF {
     return null;
   }
 
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java b/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
index fb64b26..2c247fe 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
@@ -129,4 +129,16 @@ public class LabelSubstitutionFF extends StatelessFF {
     return null;
   }
 
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
index 96999c2..0d0e0f7 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
@@ -102,4 +102,16 @@ public class OOVPenalty extends StatelessFF {
   private float getValue(int lhs) {
     return oovWeights.containsKey(lhs) ? oovWeights.get(lhs) : defaultValue;
   }
+
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
index 120ab4b..62792dc 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
@@ -132,4 +132,16 @@ public class PhraseModel extends StatelessFF {
   public String toString() {
     return name + " " + Vocabulary.word(ownerID);
   }
+
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java b/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
index 3c38e60..a185286 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
@@ -83,4 +83,16 @@ public class PhrasePenalty extends StatelessFF {
       return weights.getDense(denseFeatureIndex) * value;
     return 0.0f;
   }
+
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java b/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java
index 4d99668..e75ea12 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/RuleCountBin.java
@@ -67,4 +67,16 @@ public class RuleCountBin extends StatelessFF {
 
     return null;
   }
+
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java b/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
index 1ff6b80..bc6d67b 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
@@ -85,4 +85,16 @@ public class RuleFF extends StatelessFF {
     }
     return ruleString.replaceAll("[ =]", "~");
   }
+
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java b/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java
index e02b12b..59b1c20 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/RuleLength.java
@@ -31,7 +31,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
  * This feature computes three feature templates: a feature indicating the length of the rule's
  * source side, its target side, and a feature that pairs them.
  */
-public class RuleLength extends StatelessFF {
+public abstract class RuleLength extends StatelessFF {
 
   public RuleLength(FeatureVector weights, String[] args, JoshuaConfiguration config) {
     super(weights, "RuleLength", args, config);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java b/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
index ac5ffa4..a514021 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
@@ -70,4 +70,16 @@ public class RuleShape extends StatelessFF {
 
     return null;
   }
+
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java b/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
index 22eaa8f..d757303 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
@@ -60,4 +60,16 @@ public final class SourcePathFF extends StatelessFF {
     acc.add(denseFeatureIndex,  sourcePath.getPathCost());
     return null;
   }
+
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
index 689df3c..5661ce7 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
@@ -212,4 +212,16 @@ public class TargetBigram extends StatefulFF {
 
     return sb.substring(0, sb.length() - 1);
   }
+
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java b/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
index 0063cc4..2a40088 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/WordPenalty.java
@@ -75,4 +75,16 @@ public final class WordPenalty extends StatelessFF {
       return weights.getDense(denseFeatureIndex) * OMEGA * (rule.getEnglish().length - rule.getArity());
     return 0.0f;
   }
+
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
index 8f474ac..e438778 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
@@ -353,4 +353,16 @@ public class FragmentLMFF extends StatefulFF {
     }
   }
 
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java
new file mode 100644
index 0000000..79560fd
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/AbstractLM.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm; 
+
+import org.apache.joshua.decoder.JoshuaConfiguration; 
+import org.apache.joshua.decoder.Support; 
+import org.apache.joshua.corpus.SymbolTable; 
+
+
+import java.util.List; 
+
+/**
+ * This class implements NGramLanguageModel by creating wrappers 
+ * around the necessary functions to capture common errors. Most 
+ * methods are declared final, in an attempt to limit what subclasses 
+ * may be defined. 
+ * 
+ * @author Zhifei Li, <zh...@gmail.com> 
+ * @version $LastChangedDate: 2009-12-30 10:10:38 -0600 (Wed, 30 Dec 2009) $ 
+ */ 
+public abstract class AbstractLM extends DefaultNGramLanguageModel { 
+
+  public AbstractLM(int symbolTable, int order) { 
+    super(symbolTable, order); 
+  } 
+
+
+  public final double sentenceLogProbability( 
+      List<Integer> sentence, int order, int startIndex 
+      ) { 
+    //return super.sentenceLogProbability(sentence.stream().toArray(int[]::new) , order, startIndex); 
+    return (Double) null;
+  } 
+
+
+  public final float ngramLogProbability(int[] ngram) { 
+    return super.ngramLogProbability(ngram); 
+  } 
+
+
+  public final float ngramLogProbability(int[] ngram, int order) { 
+    if (ngram.length > order) { 
+      throw new RuntimeException("ngram length is greather than the max order"); 
+    } 
+    //  if (ngram.length==1 && "we".equals(symbolTable.getWord(ngram[0]))) { 
+    //   System.err.println("Something weird is about to happen"); 
+    //  } 
+
+    int historySize = ngram.length - 1; 
+    if (historySize >= order || historySize < 0) { 
+      // BUG: use logger or exception. Don't zero default 
+      throw new RuntimeException("Error: history size is " + historySize); 
+      //   return 0; 
+    } 
+    double probability = ngramLogProbability_helper(ngram, order); 
+//    if (probability < -JoshuaConfiguration.lm_ceiling_cost) { 
+//      probability = -JoshuaConfiguration.lm_ceiling_cost; 
+//    } 
+    return (float) probability; 
+  } 
+
+  protected abstract float ngramLogProbability_helper(int[] ngram, int order); 
+
+
+  /**
+   * @deprecated this function is much slower than the int[] 
+   *             version 
+   */ 
+  @Deprecated 
+  public final double logProbOfBackoffState(List<Integer> ngram, int order, int qtyAdditionalBackoffWeight) { 
+    return logProbabilityOfBackoffState( 
+        Support.subIntArray(ngram, 0, ngram.size()), 
+        order, qtyAdditionalBackoffWeight); 
+  } 
+
+
+  public final double logProbabilityOfBackoffState(int[] ngram, int order, int qtyAdditionalBackoffWeight) { 
+    if (ngram.length > order) { 
+      throw new RuntimeException("ngram length is greather than the max order"); 
+    } 
+    if (ngram[ngram.length-1] != LanguageModelFF.LM_INDEX) { 
+      throw new RuntimeException("last wrd is not <bow>"); 
+    } 
+    if (qtyAdditionalBackoffWeight > 0) { 
+      return logProbabilityOfBackoffState_helper( 
+          ngram, order, qtyAdditionalBackoffWeight); 
+    } else { 
+      return 0.0; 
+    } 
+  } 
+
+
+  protected abstract double logProbabilityOfBackoffState_helper( 
+      int[] ngram, int order, int qtyAdditionalBackoffWeight); 
+
+
+  // BUG: We should have different classes based on the configuration in use 
+  public int[] leftEquivalentState(int[] originalState, int order, 
+      double[] cost 
+      ) { 
+//    if (JoshuaConfiguration.use_left_equivalent_state) 
+//      throw new UnsupportedOperationException("getLeftEquivalentState is not overwritten by a concrete class"); 
+
+    return originalState; 
+  } 
+
+
+  // BUG: We should have different classes based on the configuration in use 
+  public int[] rightEquivalentState(int[] originalState, int order) { 
+//    if ( !JoshuaConfiguration.use_right_equivalent_state 
+//        || originalState.length != this.ngramOrder-1) { 
+      return originalState; 
+//    } else { 
+//      throw new UnsupportedOperationException("getRightEquivalentState is not overwritten by a concrete class"); 
+//    } 
+  } 
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/lm/ArpaFile.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/ArpaFile.java b/src/main/java/org/apache/joshua/decoder/ff/lm/ArpaFile.java
new file mode 100644
index 0000000..5e66afa
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/ArpaFile.java
@@ -0,0 +1,335 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm; 
+
+import java.io.File; 
+import java.io.FileInputStream; 
+import java.io.FileNotFoundException; 
+import java.io.IOException; 
+import java.io.InputStream; 
+import java.util.Iterator; 
+import java.util.NoSuchElementException; 
+import java.util.Scanner; 
+import java.util.logging.Level; 
+import java.util.logging.Logger; 
+import java.util.regex.Matcher; 
+import java.util.regex.Pattern; 
+import java.util.zip.GZIPInputStream; 
+
+import org.apache.joshua.corpus.Vocabulary; 
+import org.apache.joshua.util.Regex; 
+import org.apache.joshua.util.io.LineReader; 
+
+/**
+ * Utility class for reading ARPA language model files. 
+ *  
+ * @author Lane Schwartz 
+ */ 
+public class ArpaFile implements Iterable<ArpaNgram> { 
+
+  /** Logger for this class. */ 
+  private static final Logger logger =  
+      Logger.getLogger(ArpaFile.class.getName()); 
+
+  /** Regular expression representing a blank line. */ 
+  public static final Regex BLANK_LINE  = new Regex("^\\s*$"); 
+
+  /** 
+   * Regular expression representing a line  
+   * starting a new section of n-grams in an ARPA language model file.  
+   */ 
+  public static final Regex NGRAM_HEADER = new Regex("^\\\\\\d-grams:\\s*$"); 
+
+  /** 
+   * Regular expression representing a line  
+   * ending an ARPA language model file.  
+   */ 
+  public static final Regex NGRAM_END = new Regex("^\\\\end\\\\s*$"); 
+
+  /** ARPA file for this object. */ 
+  private final File arpaFile; 
+
+  /** The vocabulary associated with this object. */ 
+  private final Vocabulary vocab; 
+
+  /**
+   * Constructs an object that represents an ARPA language model file. 
+   *  
+   * @param arpaFileName File name of an ARPA language model file 
+   * @param vocab Symbol table to be used by this object 
+   */ 
+  public ArpaFile(String arpaFileName, Vocabulary vocab) { 
+    this.arpaFile = new File(arpaFileName); 
+    this.vocab = vocab; 
+  } 
+
+  public ArpaFile(String arpaFileName) throws IOException { 
+    this.arpaFile = new File(arpaFileName); 
+    this.vocab = new Vocabulary(); 
+
+    //  final Scanner scanner = new Scanner(arpaFile); 
+
+    //  // Eat initial header lines 
+    //  while (scanner.hasNextLine()) { 
+    //   String line = scanner.nextLine(); 
+    //   logger.finest("Discarding line: " + line); 
+    //   if (NGRAM_HEADER.matches(line)) { 
+    //    break; 
+    //   } 
+    //  } 
+
+    //  int ngramOrder = 1; 
+
+    LineReader grammarReader = new LineReader(arpaFileName); 
+
+    try { 
+      for (String line : grammarReader) { 
+
+
+        //  while (scanner.hasNext()) { 
+        //    
+        //   String line = scanner.nextLine(); 
+
+        String[] parts = Regex.spaces.split(line); 
+        if (parts.length > 1) { 
+          String[] words = Regex.spaces.split(parts[1]); 
+
+          for (String word : words) { 
+            if (logger.isLoggable(Level.FINE)) logger.fine("Adding to vocab: " + word); 
+            Vocabulary.addAll(word);
+          } 
+
+        } else { 
+          logger.info(line); 
+        } 
+
+      } 
+    } finally {  
+      grammarReader.close();  
+    } 
+
+    //    
+    //   boolean lineIsHeader = NGRAM_HEADER.matches(line); 
+    //    
+    //   while (lineIsHeader || BLANK_LINE.matches(line)) { 
+    //     
+    //    if (lineIsHeader) { 
+    //     ngramOrder++; 
+    //    } 
+    //     
+    //    if (scanner.hasNext()) { 
+    //     line = scanner.nextLine().trim(); 
+    //     lineIsHeader = NGRAM_HEADER.matches(line); 
+    //    } else { 
+    //     logger.severe("Ran out of lines!"); 
+    //     return; 
+    //    } 
+    //   } 
+
+
+    //    
+    //   // Add word to vocab 
+    //   if (logger.isLoggable(Level.FINE)) logger.fine("Adding word to vocab: " + parts[ngramOrder]); 
+    //   vocab.addTerminal(parts[ngramOrder]); 
+    //    
+    //   // Add context words to vocab 
+    //   for (int i=1; i<ngramOrder; i++) { 
+    //    if (logger.isLoggable(Level.FINE)) logger.fine("Adding context word to vocab: " + parts[i]); 
+    //    vocab.addTerminal(parts[i]); 
+    //   } 
+
+    //  } 
+
+    logger.info("Done constructing ArpaFile"); 
+
+  } 
+
+  /**
+   * Gets the {@link org.apache.joshua.corpus.Vocabulary} 
+   * associated with this object. 
+   *  
+   * @return the symbol table associated with this object 
+   */ 
+  public Vocabulary getVocab() { 
+    return vocab; 
+  } 
+
+  /**
+   * Gets the total number of n-grams  
+   * in this ARPA language model file. 
+   *  
+   * @return total number of n-grams  
+   *         in this ARPA language model file 
+   */ 
+  @SuppressWarnings("unused") 
+  public int size() { 
+
+    logger.fine("Counting n-grams in ARPA file"); 
+    int count=0; 
+
+    for (ArpaNgram ngram : this) { 
+      count++; 
+    } 
+    logger.fine("Done counting n-grams in ARPA file"); 
+
+    return count; 
+  } 
+
+  public int getOrder() throws FileNotFoundException { 
+
+    Pattern pattern = Pattern.compile("^ngram (\\d+)=\\d+$"); 
+    if (logger.isLoggable(Level.FINEST)) logger.finest("Pattern is " + pattern.toString()); 
+    @SuppressWarnings("resource")
+    final Scanner scanner = new Scanner(arpaFile); 
+
+    int order = 0; 
+
+    // Eat initial header lines 
+    while (scanner.hasNextLine()) { 
+      String line = scanner.nextLine(); 
+
+      if (NGRAM_HEADER.matches(line)) { 
+        break; 
+      } else { 
+        Matcher matcher = pattern.matcher(line); 
+        if (matcher.matches()) { 
+          if (logger.isLoggable(Level.FINEST)) logger.finest("DOES   match: \'" + line + "\'"); 
+          order = Integer.valueOf(matcher.group(1)); 
+        } else if (logger.isLoggable(Level.FINEST)) { 
+          logger.finest("Doesn't match: \'" + line + "\'"); 
+        } 
+      } 
+    } 
+
+    return order; 
+  } 
+
+  /**
+   * Gets an iterator capable of iterating  
+   * over all n-grams in the ARPA file. 
+   *  
+   * @return an iterator capable of iterating  
+   *         over all n-grams in the ARPA file 
+   */ 
+  @SuppressWarnings("resource")
+  public Iterator<ArpaNgram> iterator() { 
+
+    try { 
+      final Scanner scanner; 
+
+      if (arpaFile.getName().endsWith("gz")) { 
+        InputStream in = new GZIPInputStream( 
+            new FileInputStream(arpaFile)); 
+        scanner = new Scanner(in); 
+      } else { 
+        scanner = new Scanner(arpaFile); 
+      } 
+
+      // Eat initial header lines 
+      while (scanner.hasNextLine()) { 
+        String line = scanner.nextLine(); 
+        logger.finest("Discarding line: " + line); 
+        if (NGRAM_HEADER.matches(line)) { 
+          break; 
+        } 
+      } 
+
+      return new Iterator<ArpaNgram>() { 
+
+        String nextLine = null; 
+        int ngramOrder = 1; 
+        //    int id = 0; 
+
+        public boolean hasNext() { 
+
+          if (scanner.hasNext()) { 
+
+            String line = scanner.nextLine(); 
+
+            boolean lineIsHeader = NGRAM_HEADER.matches(line) || NGRAM_END.matches(line); 
+
+            while (lineIsHeader || BLANK_LINE.matches(line)) { 
+
+              if (lineIsHeader) { 
+                ngramOrder++; 
+              } 
+
+              if (scanner.hasNext()) { 
+                line = scanner.nextLine().trim(); 
+                lineIsHeader = NGRAM_HEADER.matches(line) || NGRAM_END.matches(line); 
+              } else { 
+                nextLine = null; 
+                return false; 
+              } 
+            } 
+
+            nextLine = line; 
+            return true; 
+
+          } else { 
+            nextLine = null; 
+            return false; 
+          } 
+
+        } 
+
+        public ArpaNgram next() { 
+          if (nextLine!=null) { 
+
+            String[] parts = Regex.spaces.split(nextLine); 
+
+            float value = Float.valueOf(parts[0]); 
+
+            int word = Vocabulary.id(parts[ngramOrder]); 
+
+            int[] context = new int[ngramOrder-1]; 
+            for (int i=1; i<ngramOrder; i++) { 
+              context[i-1] = Vocabulary.id(parts[i]); 
+            } 
+
+            float backoff; 
+            if (parts.length > ngramOrder+1) { 
+              backoff = Float.valueOf(parts[parts.length-1]); 
+            } else { 
+              backoff = ArpaNgram.DEFAULT_BACKOFF; 
+            } 
+
+            nextLine = null; 
+            return new ArpaNgram(word, context, value, backoff); 
+
+          } else { 
+            throw new NoSuchElementException(); 
+          } 
+        } 
+
+        public void remove() { 
+          throw new UnsupportedOperationException(); 
+        } 
+
+      }; 
+    } catch (FileNotFoundException e) { 
+      logger.severe(e.toString()); 
+      return null; 
+    } catch (IOException e) { 
+      logger.severe(e.toString()); 
+      return null; 
+    } 
+
+  } 
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/lm/ArpaNgram.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/ArpaNgram.java b/src/main/java/org/apache/joshua/decoder/ff/lm/ArpaNgram.java
new file mode 100644
index 0000000..d0077d1
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/ArpaNgram.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm; 
+
+/**
+ * Represents a single n-gram line  
+ * from an ARPA language model file. 
+ *  
+ * @author Lane Schwartz 
+ */ 
+public class ArpaNgram { 
+
+
+  /** Indicates an invalid probability value. */ 
+  public static final float INVALID_VALUE = Float.NaN; 
+
+  /** Default backoff value. */ 
+  public static final float DEFAULT_BACKOFF = 0.0f; 
+
+  private final int word; 
+  private final int[] context; 
+  private final float value; 
+  private final float backoff; 
+  // private final int id; 
+
+  public ArpaNgram(int word, int[] context, float value, float backoff) { 
+    this.word = word; 
+    this.context = context; 
+    this.value = value; 
+    this.backoff = backoff; 
+    //  this.id = id; 
+  } 
+
+  // public int getID() { 
+  //  return id; 
+  // } 
+
+  public int order() { 
+    return context.length + 1; 
+  } 
+
+  public int getWord() { 
+    return word; 
+  } 
+
+  public int[] getContext() { 
+    return context; 
+  } 
+
+  public float getValue() { 
+    return value; 
+  } 
+
+  public float getBackoff() { 
+    return backoff; 
+  } 
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
index d69d552..f2daffd 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
@@ -517,4 +517,16 @@ public class LanguageModelFF extends StatefulFF {
   public static void resetLmIndex() {
     LM_INDEX = 0;
   }
+
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java
new file mode 100644
index 0000000..654561c
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java
@@ -0,0 +1,332 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm.buildin_lm;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.apache.joshua.corpus.SymbolTable;
+import org.apache.joshua.corpus.Vocabulary;
+import  org.apache.joshua.decoder.JoshuaConfiguration;
+import  org.apache.joshua.decoder.ff.lm.AbstractLM;
+import  org.apache.joshua.decoder.ff.lm.ArpaFile;
+import  org.apache.joshua.decoder.ff.lm.ArpaNgram;
+import  org.apache.joshua.util.Bits;
+import  org.apache.joshua.util.Regex;
+
+/**
+ * Relatively memory-compact language model
+ * stored as a reversed-word-order trie.
+ * <p>
+ * The trie itself represents language model context.
+ * <p>
+ * Conceptually, each node in the trie stores a map 
+ * from conditioning word to log probability.
+ * <p>
+ * Additionally, each node in the trie stores 
+ * the backoff weight for that context.
+ * 
+ * @author Lane Schwartz
+ * @see <a href="http://www.speech.sri.com/projects/srilm/manpages/ngram-discount.7.html">SRILM ngram-discount documentation</a>
+ */
+public class TrieLM extends AbstractLM { //DefaultNGramLanguageModel {
+
+  /** Logger for this class. */
+  private static Logger logger =
+      Logger.getLogger(TrieLM.class.getName());
+
+  /**
+   * Node ID for the root node.
+   */
+  private static final int ROOT_NODE_ID = 0;
+
+
+  /** 
+   * Maps from (node id, word id for child) --> node id of child. 
+   */
+  private final Map<Long,Integer> children;
+
+  /**
+   * Maps from (node id, word id for lookup word) --> 
+   * log prob of lookup word given context 
+   * 
+   * (the context is defined by where you are in the tree).
+   */
+  private final Map<Long,Float> logProbs;
+
+  /**
+   * Maps from (node id) --> 
+   * backoff weight for that context 
+   * 
+   * (the context is defined by where you are in the tree).
+   */
+  private final Map<Integer,Float> backoffs;
+
+  public TrieLM(Vocabulary vocab, String file) throws FileNotFoundException {
+    this(new ArpaFile(file,vocab));
+  }
+
+  /**
+   * Constructs a language model object from the specified ARPA file.
+   * 
+   * @param arpaFile
+   * @throws FileNotFoundException 
+   */
+  public TrieLM(ArpaFile arpaFile) throws FileNotFoundException {
+    super(arpaFile.getVocab().size(), arpaFile.getOrder());
+
+    int ngramCounts = arpaFile.size();
+    if (logger.isLoggable(Level.FINE)) logger.fine("ARPA file contains " + ngramCounts + " n-grams");
+
+    this.children = new HashMap<Long,Integer>(ngramCounts);
+    this.logProbs = new HashMap<Long,Float>(ngramCounts);
+    this.backoffs = new HashMap<Integer,Float>(ngramCounts);
+
+    int nodeCounter = 0;
+
+    int lineNumber = 0;
+    for (ArpaNgram ngram : arpaFile) {
+      lineNumber += 1;
+      if (lineNumber%100000==0) logger.info("Line: " + lineNumber);
+
+      if (logger.isLoggable(Level.FINEST)) logger.finest(ngram.order() + "-gram: (" + ngram.getWord() + " | " + Arrays.toString(ngram.getContext()) + ")");
+      int word = ngram.getWord();
+
+      int[] context = ngram.getContext();
+
+      {
+        // Find where the log prob should be stored
+        int contextNodeID = ROOT_NODE_ID;
+        {
+          for (int i=context.length-1; i>=0; i--) {
+            long key = Bits.encodeAsLong(contextNodeID, context[i]);
+            int childID;
+            if (children.containsKey(key)) {
+              childID = children.get(key);
+            } else {
+              childID = ++nodeCounter;
+              if (logger.isLoggable(Level.FINEST)) logger.finest("children.put(" + contextNodeID + ":"+context[i] + " , " + childID + ")");
+              children.put(key, childID);
+            }
+            contextNodeID = childID;
+          }
+        }
+
+        // Store the log prob for this n-gram at this node in the trie
+        {
+          long key = Bits.encodeAsLong(contextNodeID, word);
+          float logProb = ngram.getValue();
+          if (logger.isLoggable(Level.FINEST)) logger.finest("logProbs.put(" + contextNodeID + ":"+word + " , " + logProb);
+          this.logProbs.put(key, logProb);
+        }
+      }
+
+      {
+        // Find where the backoff should be stored
+        int backoffNodeID = ROOT_NODE_ID;
+        { 
+          long backoffNodeKey = Bits.encodeAsLong(backoffNodeID, word);
+          int wordChildID;
+          if (children.containsKey(backoffNodeKey)) {
+            wordChildID = children.get(backoffNodeKey);
+          } else {
+            wordChildID = ++nodeCounter;
+            if (logger.isLoggable(Level.FINEST)) logger.finest("children.put(" + backoffNodeID + ":"+word + " , " + wordChildID + ")");
+            children.put(backoffNodeKey, wordChildID);
+          }
+          backoffNodeID = wordChildID;
+
+          for (int i=context.length-1; i>=0; i--) {
+            long key = Bits.encodeAsLong(backoffNodeID, context[i]);
+            int childID;
+            if (children.containsKey(key)) {
+              childID = children.get(key);
+            } else {
+              childID = ++nodeCounter;
+              if (logger.isLoggable(Level.FINEST)) logger.finest("children.put(" + backoffNodeID + ":"+context[i] + " , " + childID + ")");
+              children.put(key, childID);
+            }
+            backoffNodeID = childID;
+          }
+        }
+
+        // Store the backoff for this n-gram at this node in the trie
+        {
+          float backoff = ngram.getBackoff();
+          if (logger.isLoggable(Level.FINEST)) logger.finest("backoffs.put(" + backoffNodeID + ":" +word+" , " + backoff + ")");
+          this.backoffs.put(backoffNodeID, backoff);
+        }
+      }
+
+    }
+  }
+
+
+  @Override
+  protected double logProbabilityOfBackoffState_helper(
+      int[] ngram, int order, int qtyAdditionalBackoffWeight
+      ) {
+    throw new UnsupportedOperationException("probabilityOfBackoffState_helper undefined for TrieLM");
+  }
+
+  @Override
+  protected float ngramLogProbability_helper(int[] ngram, int order) {
+
+//    float logProb = (float) -JoshuaConfiguration.lm_ceiling_cost;//Float.NEGATIVE_INFINITY; // log(0.0f)
+    float backoff = 0.0f; // log(1.0f)
+
+    int i = ngram.length - 1;
+    int word = ngram[i];
+    i -= 1;
+
+    int nodeID = ROOT_NODE_ID;
+
+    while (true) {
+
+      {
+        long key = Bits.encodeAsLong(nodeID, word);
+        if (logProbs.containsKey(key)) {
+//          logProb = logProbs.get(key);
+          backoff = 0.0f; // log(0.0f)
+        }
+      }
+
+      if (i < 0) {
+        break;
+      }
+
+      {
+        long key = Bits.encodeAsLong(nodeID, ngram[i]);
+
+        if (children.containsKey(key)) {
+          nodeID = children.get(key);
+
+          backoff += backoffs.get(nodeID);
+
+          i -= 1;
+
+        } else {
+          break;
+        }
+      }
+
+    }
+
+//    double result = logProb + backoff;
+//    if (result < -JoshuaConfiguration.lm_ceiling_cost) {
+//      result = -JoshuaConfiguration.lm_ceiling_cost;
+//    }
+//
+//    return result;
+    return (Float) null;
+  }
+
+  public Map<Long,Integer> getChildren() {
+    return this.children;
+  }
+
+  public static void main(String[] args) throws IOException {
+
+    logger.info("Constructing ARPA file");
+    ArpaFile arpaFile = new ArpaFile(args[0]);
+
+    logger.info("Getting symbol table");
+    Vocabulary vocab = arpaFile.getVocab();
+
+    logger.info("Constructing TrieLM");
+    TrieLM lm = new TrieLM(arpaFile);
+
+    int n = Integer.valueOf(args[2]);
+    logger.info("N-gram order will be " + n);
+
+    Scanner scanner = new Scanner(new File(args[1]));
+
+    LinkedList<String> wordList = new LinkedList<String>();
+    LinkedList<String> window = new LinkedList<String>();
+
+    logger.info("Starting to scan " + args[1]);
+    while (scanner.hasNext()) {
+
+      logger.info("Getting next line...");
+      String line = scanner.nextLine();
+      logger.info("Line: " + line);
+
+      String[] words = Regex.spaces.split(line);
+      wordList.clear();
+
+      wordList.add("<s>");
+      for (String word : words) {
+        wordList.add(word);
+      }
+      wordList.add("</s>");
+
+      ArrayList<Integer> sentence = new ArrayList<Integer>();
+      //        int[] ids = new int[wordList.size()];
+      for (int i=0, size=wordList.size(); i<size; i++) {
+        sentence.add(vocab.id(wordList.get(i)));
+        //          ids[i] = ;
+      }
+
+
+
+      while (! wordList.isEmpty()) {
+        window.clear();
+
+        {
+          int i=0;
+          for (String word : wordList) {
+            if (i>=n) break;
+            window.add(word);
+            i++;
+          }
+          wordList.remove();
+        }
+
+        {
+          int i=0;
+          int[] wordIDs = new int[window.size()];
+          for (String word : window) {
+            wordIDs[i] = vocab.id(word);
+            i++;
+          }
+
+          logger.info("logProb " + window.toString() + " = " + lm.ngramLogProbability(wordIDs, n));
+        }
+      }
+
+      double logProb = lm.sentenceLogProbability(sentence, n, 2);//.ngramLogProbability(ids, n);
+      double prob = Math.exp(logProb);
+
+      logger.info("Total logProb = " + logProb);
+      logger.info("Total    prob = " + prob);
+    }
+
+  }
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/package-info.java b/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/package-info.java
new file mode 100644
index 0000000..6c84703
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/package-info.java
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.lm.buildin_lm;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
index cf0af8b..c9a3214 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
@@ -68,4 +68,16 @@ public class Distortion extends StatelessFF {
 
     return null;
   }
+
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java b/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
index 41cac0d..6ac6b42 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
@@ -274,4 +274,16 @@ public class EdgePhraseSimilarityFF extends StatefulFF implements SourceDependen
     return (count == 0 ? 0 : similarity / count);
   }
 
+  @Override
+  public double estimateLogP(Rule rule, int sentID) {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
+  @Override
+  public double getWeight() {
+    // TODO Auto-generated method stub
+    return 0;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/tm/BilingualRule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/BilingualRule.java b/src/main/java/org/apache/joshua/decoder/ff/tm/BilingualRule.java
new file mode 100644
index 0000000..6e35e2d
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/BilingualRule.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.tm; 
+
+import java.util.Arrays; 
+import java.util.Map; 
+
+import org.apache.joshua.corpus.SymbolTable; 
+
+
+/**
+ * Normally, the feature score in the rule should be *cost* (i.e., 
+ * -LogP), so that the feature weight should be positive 
+ * 
+ * @author Zhifei Li, <zh...@gmail.com> 
+ * @version $LastChangedDate: 2010-01-20 19:46:54 -0600 (Wed, 20 Jan 2010) $ 
+ */ 
+public class BilingualRule extends MonolingualRule { 
+
+  private int[] english; 
+
+  //=============================================================== 
+  // Constructors 
+  //=============================================================== 
+
+  /**
+   * Constructs a new rule using the provided parameters. The 
+   * owner and rule id for this rule are undefined. 
+   *  
+   * @param lhs Left-hand side of the rule. 
+   * @param sourceRhs Source language right-hand side of the rule. 
+   * @param targetRhs Target language right-hand side of the rule. 
+   * @param featureScores Feature value scores for the rule. 
+   * @param arity Number of nonterminals in the source language 
+   *              right-hand side. 
+   * @param owner 
+   * @param latticeCost 
+   * @param ruleID 
+   */ 
+  public BilingualRule(int lhs, int[] sourceRhs, int[] targetRhs, float[] featureScores, int arity, int owner, float latticeCost, int ruleID) { 
+    super(lhs, sourceRhs, featureScores, arity, owner, latticeCost, ruleID); 
+    this.english = targetRhs;   
+  } 
+
+  //called by class who does not care about lattice_cost, rule_id, and owner 
+  public BilingualRule(int lhs, int[] sourceRhs, int[] targetRhs, float[] featureScores, int arity) { 
+    super(lhs, sourceRhs, featureScores, arity); 
+    this.english = targetRhs; 
+  } 
+
+
+  //=============================================================== 
+  // Attributes 
+  //=============================================================== 
+
+  public final void setEnglish(int[] eng) { 
+    this.english = eng; 
+  } 
+
+  public final int[] getEnglish() { 
+    return this.english; 
+  } 
+
+
+  //=============================================================== 
+  // Serialization Methods 
+  //=============================================================== 
+  // TODO: remove these methods 
+
+  // Caching this method significantly improves performance 
+  // We mark it transient because it is, though cf java.io.Serializable 
+  private transient String cachedToString = null; 
+
+  public String toString(Map<Integer,String> ntVocab, SymbolTable sourceVocab, SymbolTable targetVocab) { 
+    if (null == this.cachedToString) { 
+      StringBuffer sb = new StringBuffer("["); 
+      sb.append(ntVocab.get(this.getLHS())); 
+      sb.append("] ||| "); 
+      sb.append(sourceVocab.getWords(this.getFrench(),true)); 
+      sb.append(" ||| "); 
+      sb.append(targetVocab.getWords(this.english,false)); 
+      //sb.append(java.util.Arrays.toString(this.english)); 
+      sb.append(" |||"); 
+      for (int i = 0; i < this.getFeatureScores().length; i++) { 
+        //    sb.append(String.format(" %.12f", this.getFeatureScores()[i])); 
+        sb.append(' '); 
+        sb.append(Float.toString(this.getFeatureScores()[i])); 
+      } 
+      this.cachedToString = sb.toString(); 
+    } 
+    return this.cachedToString; 
+  } 
+
+
+  //print the rule in terms of Integers 
+  public String toString() { 
+    if (null == this.cachedToString) { 
+      StringBuffer sb = new StringBuffer(); 
+      sb.append(this.getClass().getName() + "@" + Integer.toHexString(System.identityHashCode(this))); 
+      sb.append("~~~"); 
+      sb.append(this.getLHS()); 
+      sb.append(" ||| "); 
+      sb.append(Arrays.toString(this.getFrench())); 
+      sb.append(" ||| "); 
+      sb.append(Arrays.toString(this.english)); 
+      sb.append(" |||"); 
+      for (int i = 0; i < this.getFeatureScores().length; i++) { 
+        sb.append(String.format(" %.4f", this.getFeatureScores()[i])); 
+      } 
+      this.cachedToString = sb.toString(); 
+    } 
+    return this.cachedToString; 
+  } 
+
+
+  public String toString(SymbolTable symbolTable) { 
+    if (null == this.cachedToString) { 
+      StringBuffer sb = new StringBuffer(); 
+      sb.append(symbolTable.getWord(this.getLHS())); 
+      sb.append(" ||| "); 
+      sb.append(symbolTable.getWords(this.getFrench())); 
+      sb.append(" ||| "); 
+      sb.append(symbolTable.getWords(this.english)); 
+      sb.append(" |||"); 
+      for (int i = 0; i < this.getFeatureScores().length; i++) { 
+        sb.append(String.format(" %.4f", this.getFeatureScores()[i])); 
+      } 
+      this.cachedToString = sb.toString(); 
+    } 
+    return this.cachedToString; 
+  } 
+
+  public String toStringWithoutFeatScores(SymbolTable symbolTable) { 
+    StringBuffer sb = new StringBuffer(); 
+    if(symbolTable==null) 
+      sb.append(this.getLHS()); 
+    else 
+      sb.append(symbolTable.getWord(this.getLHS())); 
+
+    return sb.append(" ||| ") 
+        .append(convertToString(this.getFrench(), symbolTable)) 
+        .append(" ||| ") 
+        .append(convertToString(this.getEnglish(), symbolTable)) 
+        .toString(); 
+  } 
+
+
+
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/decoder/ff/tm/MonolingualRule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/MonolingualRule.java b/src/main/java/org/apache/joshua/decoder/ff/tm/MonolingualRule.java
new file mode 100644
index 0000000..812e669
--- /dev/null
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/MonolingualRule.java
@@ -0,0 +1,315 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.ff.tm; 
+
+import java.util.Arrays; 
+import java.util.List; 
+import java.util.Map; 
+import java.util.logging.Logger; 
+
+import org.apache.joshua.corpus.SymbolTable; 
+import org.apache.joshua.decoder.ff.FeatureFunction; 
+
+/**
+ * this class implements MonolingualRule 
+ * 
+ * @author Zhifei Li, <zh...@gmail.com> 
+ * @version $LastChangedDate: 2010-02-10 09:59:38 -0600 (Wed, 10 Feb 2010) $ 
+ */ 
+public class MonolingualRule extends Rule { 
+
+  private static final Logger logger = 
+      Logger.getLogger(MonolingualRule.class.getName()); 
+
+  //=============================================================== 
+  // Instance Fields 
+  //=============================================================== 
+
+  /* The string format of Rule is:
+   * [Phrase] ||| french ||| english ||| feature scores 
+   */ 
+  private int ruleID; 
+  private int lhs; // tag of this rule 
+  private int[] pFrench; //pointer to the RuleCollection, as all the rules under it share the same Source side 
+  private int arity; 
+  private float[] featScores; // the feature scores for this rule 
+
+  /* a feature function will be fired for this rule
+   * only if the owner of the rule matches the owner of the feature function 
+   */ 
+  private int owner; 
+
+  // TODO: consider remove this from the general class, and 
+  // create a new specific Rule class 
+  private float latticeCost;  
+
+  /**
+   * estimate_cost depends on rule itself: statelesscost + 
+   * transition_cost(non-stateless/non-contexual* models), 
+   * we need this variable in order to provide sorting for 
+   * cube-pruning 
+   */ 
+  private float est_cost = 0; 
+
+  //=============================================================== 
+  // Static Fields 
+  //=============================================================== 
+
+  // TODO: Ideally, we shouldn't have to have dummy rule IDs 
+  // and dummy owners. How can this need be eliminated? 
+  public static final int DUMMY_RULE_ID = 1; 
+  public static final int DUMMY_OWNER = 1; 
+
+
+  //=============================================================== 
+  // Constructors 
+  //=============================================================== 
+
+  /**
+   * Constructs a new rule using the provided parameters. The 
+   * owner and rule id for this rule are undefined. 
+   *  
+   * @param lhs Left-hand side of the rule. 
+   * @param sourceRhs Source language right-hand side of the rule. 
+   * @param featureScores Feature value scores for the rule. 
+   * @param arity Number of nonterminals in the source language 
+   *              right-hand side. 
+   * @param owner 
+   * @param latticeCost 
+   * @param ruleID 
+   */ 
+  public MonolingualRule(int lhs, int[] sourceRhs, float[] featureScores, int arity, int owner, float latticeCost, int ruleID) { 
+    this.lhs          = lhs; 
+    this.pFrench     = sourceRhs; 
+    this.featScores  = featureScores; 
+    this.arity        = arity; 
+    this.latticeCost = latticeCost; 
+    this.ruleID      = ruleID; 
+    this.owner        = owner; 
+  } 
+
+
+  // called by class who does not care about lattice_cost, 
+  // rule_id, and owner 
+  public MonolingualRule(int lhs_, int[] source_rhs, float[] feature_scores, int arity_) { 
+    this.lhs         = lhs_; 
+    this.pFrench    = source_rhs; 
+    this.featScores = feature_scores; 
+    this.arity       = arity_; 
+
+    //==== dummy values 
+    this.latticeCost = 0; 
+    this.ruleID      = DUMMY_RULE_ID; 
+    this.owner        = DUMMY_OWNER; 
+  } 
+
+
+  //=============================================================== 
+  // Attributes 
+  //=============================================================== 
+
+  public final void setRuleID(int id) { this.ruleID = id; } 
+
+  public final int getRuleID() { return this.ruleID; } 
+
+
+  public final void setArity(int arity) { this.arity = arity; } 
+
+  public final int getArity() { return this.arity; } 
+
+
+  public final void setOwner(int owner) { this.owner = owner; } 
+
+  public final int getOwner() { return this.owner; } 
+
+
+  public final void setLHS(int lhs) { this.lhs = lhs; } 
+
+  public final int getLHS() { return this.lhs; } 
+
+
+  public void setEnglish(int[] eng) { 
+    //TODO: do nothing 
+  } 
+
+  public int[] getEnglish() { 
+    //TODO 
+    return null; 
+  } 
+
+
+  public final void setFrench(int[] french) { this.pFrench = french; } 
+
+  public final int[] getFrench() { return this.pFrench; } 
+
+
+  public final void setFeatureScores(float[] scores) { 
+    this.featScores = scores; 
+  } 
+
+  public final float[] getFeatureScores() { 
+    return this.featScores; 
+  } 
+
+
+  public final void setLatticeCost(float cost) { this.latticeCost = cost; } 
+
+  public final float getLatticeCost() { return this.latticeCost; } 
+
+
+  public final float getEstCost() { 
+    if (est_cost <= Double.NEGATIVE_INFINITY) { 
+      logger.warning("The est cost is neg infinity; must be bad rule; rule is:\n" + toString()); 
+    } 
+    return est_cost; 
+  } 
+
+
+  /** 
+   * Set a lower-bound estimate inside the rule returns full 
+   * estimate. 
+   */ 
+  public final float estimateRuleCost(List<FeatureFunction> featureFunctions) { 
+    if (null == featureFunctions) { 
+      return 0; 
+    } else { 
+      float estcost = 0.0f; 
+      for (FeatureFunction ff : featureFunctions) { 
+        double mdcost = - ff.estimateLogP(this, -1) * ff.getWeight(); 
+        estcost += mdcost; 
+      } 
+
+      this.est_cost = estcost; 
+      return estcost; 
+    } 
+  } 
+
+  //=============================================================== 
+  // Methods 
+  //=============================================================== 
+
+  public float incrementFeatureScore(int column, double score) { 
+    synchronized(this) { 
+      featScores[column] += score; 
+      return featScores[column]; 
+    } 
+  } 
+
+
+  public void setFeatureCost(int column, float score) { 
+    synchronized(this) { 
+      featScores[column] = score; 
+    } 
+  } 
+
+
+  public float getFeatureCost(int column) { 
+    synchronized(this) { 
+      return featScores[column]; 
+    } 
+  } 
+
+  //=============================================================== 
+  // Serialization Methods 
+  //=============================================================== 
+  // BUG: These are all far too redundant. Should be refactored to share. 
+
+  // Caching this method significantly improves performance 
+  // We mark it transient because it is, though cf 
+  // java.io.Serializable 
+  private transient String cachedToString = null; 
+
+  @Deprecated 
+  public String toString(Map<Integer,String> ntVocab, SymbolTable sourceVocab, SymbolTable targetVocab) { 
+    if (null == this.cachedToString) { 
+      StringBuffer sb = new StringBuffer(); 
+      sb.append(ntVocab.get(this.lhs)); 
+      sb.append(" ||| "); 
+      sb.append(sourceVocab.getWords(this.pFrench,true)); 
+      sb.append(" |||"); 
+      for (int i = 0; i < this.featScores.length; i++) { 
+        //sb.append(String.format(" %.4f", this.feat_scores[i])); 
+        sb.append(' ').append(Float.toString(this.featScores[i])); 
+      } 
+      this.cachedToString = sb.toString(); 
+    } 
+    return this.cachedToString; 
+  } 
+
+
+  //print the rule in terms of Ingeters 
+  @Deprecated 
+  public String toString() { 
+    if (null == this.cachedToString) { 
+      StringBuffer sb = new StringBuffer(); 
+      sb.append(this.lhs); 
+      sb.append(" ||| "); 
+      sb.append(Arrays.toString(this.pFrench)); 
+      sb.append(" |||"); 
+      for (int i = 0; i < this.featScores.length; i++) { 
+        sb.append(String.format(" %.4f", this.featScores[i])); 
+      } 
+      this.cachedToString = sb.toString(); 
+    } 
+    return this.cachedToString; 
+  } 
+
+
+  //do not use cachedToString 
+  @Deprecated 
+  public String toString(SymbolTable symbolTable) { 
+    StringBuffer sb = new StringBuffer(); 
+    sb.append(symbolTable.getWord(this.lhs)); 
+    sb.append(" ||| "); 
+    sb.append(symbolTable.getWords(this.pFrench)); 
+    sb.append(" |||"); 
+    for (int i = 0; i < this.featScores.length; i++) { 
+      sb.append(String.format(" %.4f", this.featScores[i])); 
+    } 
+    return sb.toString(); 
+  } 
+
+
+  @Deprecated 
+  public String toStringWithoutFeatScores(SymbolTable symbolTable) { 
+    StringBuffer sb = new StringBuffer(); 
+    if(symbolTable==null) 
+      sb.append(this.getLHS()); 
+    else 
+      sb.append(symbolTable.getWord(this.getLHS())); 
+
+    return sb.append(" ||| ") 
+        .append(convertToString(this.getFrench(), symbolTable)) 
+        .toString(); 
+  } 
+
+  public String convertToString(int[] words, SymbolTable symbolTable){   
+    StringBuffer sb = new StringBuffer(); 
+    for (int i = 0; i < words.length; i++) { 
+      if(symbolTable!=null) 
+        sb.append( symbolTable.getWord(words[i]) ); 
+      else 
+        sb.append(words[i]); 
+
+      if(i<words.length-1) 
+        sb.append(" "); 
+    } 
+    return sb.toString(); 
+  } 
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ab5bb42c/src/main/java/org/apache/joshua/lattice/Lattice.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/lattice/Lattice.java b/src/main/java/org/apache/joshua/lattice/Lattice.java
index 98938d8..1adefa8 100644
--- a/src/main/java/org/apache/joshua/lattice/Lattice.java
+++ b/src/main/java/org/apache/joshua/lattice/Lattice.java
@@ -25,6 +25,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Stack;
+import java.util.logging.Level;
 import java.util.logging.Logger;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -62,7 +63,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
 
   /** Logger for this class. */
   private static final Logger logger = Logger.getLogger(Lattice.class.getName());
-  
+
   JoshuaConfiguration config = null;
 
   /**
@@ -75,7 +76,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
    */
   public Lattice(List<Node<Value>> nodes, JoshuaConfiguration config) {
     this.nodes = nodes;
-//    this.distances = calculateAllPairsShortestPath();
+    //    this.distances = calculateAllPairsShortestPath();
     this.latticeHasAmbiguity = true;
   }
 
@@ -83,7 +84,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
     // Node<Value> sink = new Node<Value>(nodes.size());
     // nodes.add(sink);
     this.nodes = nodes;
-//    this.distances = calculateAllPairsShortestPath();
+    //    this.distances = calculateAllPairsShortestPath();
     this.latticeHasAmbiguity = isAmbiguous;
   }
 
@@ -114,7 +115,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
       i++;
     }
 
-//    this.distances = calculateAllPairsShortestPath();
+    //    this.distances = calculateAllPairsShortestPath();
   }
 
   public final boolean hasMoreThanOnePath() {
@@ -155,7 +156,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
 
   public static Lattice<Token> createTokenLatticeFromPLF(String data, JoshuaConfiguration config) {
     ArrayList<Node<Token>> nodes = new ArrayList<Node<Token>>();
-    
+
     // This matches a sequence of tuples, which describe arcs leaving this node
     Pattern nodePattern = Pattern.compile("(.+?)\\(\\s*(\\(.+?\\),\\s*)\\s*\\)(.*)");
 
@@ -320,7 +321,7 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
     // System.err.println(String.format("DISTANCE(%d,%d) = %f", from, to, costs[from][to]));
     if (distances == null)
       this.distances = calculateAllPairsShortestPath();
-    
+
     return distances.get(from, to);
   }
 
@@ -448,22 +449,22 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
    * @param lattice
    */
   public void insert(int i, int j, List<Node<Value>> newNodes) {
-    
+
     nodes.get(i).setOutgoingArcs(newNodes.get(0).getOutgoingArcs());
-    
+
     newNodes.remove(0);
     nodes.remove(j);
     Collections.reverse(newNodes);
-    
+
     for (Node<Value> node: newNodes)
       nodes.add(j, node);
-  
+
     this.latticeHasAmbiguity = false;
     for (int x = 0; x < nodes.size(); x++) {
       nodes.get(x).setID(x);
       this.latticeHasAmbiguity |= (nodes.get(x).getOutgoingArcs().size() > 1);
     }
-    
+
     this.distances = null;
   }
 
@@ -481,35 +482,104 @@ public class Lattice<Value> implements Iterable<Node<Value>> {
       ArrayList<Arc<Value>> arcs = new ArrayList<Arc<Value>>();
       for (Arc<Value> arc: node.getOutgoingArcs()) {
         arcs.add(arc);
-        
+
         if (! ingraph.containsKey(arc.getHead()))
           ingraph.put(arc.getHead(), new ArrayList<Arc<Value>>());
         ingraph.get(arc.getHead()).add(arc);
-        
+
         outgraph.put(node, arcs);
       }
     }
-    
+
     ArrayList<Node<Value>> sortedNodes = new ArrayList<Node<Value>>();
     Stack<Node<Value>> stack = new Stack<Node<Value>>();
     stack.push(nodes.get(0));
-    
+
     while (! stack.empty()) {
       Node<Value> node = stack.pop();
       sortedNodes.add(node);
       for (Arc<Value> arc: outgraph.get(node)) {
         outgraph.get(node).remove(arc);
         ingraph.get(arc.getHead()).remove(arc);
-        
+
         if (ingraph.get(arc.getHead()).size() == 0)
           sortedNodes.add(arc.getHead());
       }
     }
-    
+
     int id = 0;
     for (Node<Value> node : sortedNodes)
       node.setID(id++);
-    
+
     this.nodes = sortedNodes;
   }
+
+  /**
+   * Constructs a lattice from a given string representation. 
+   * 
+   * @param data String representation of a lattice. 
+   * @return A lattice that corresponds to the given string. 
+   */ 
+  public static Lattice<String> createFromString(String data) { 
+
+    Map<Integer,Node<String>> nodes = new HashMap<Integer,Node<String>>(); 
+
+    Pattern nodePattern = Pattern.compile("(.+?)\\((\\(.+?\\),)\\)(.*)"); 
+    Pattern arcPattern = Pattern.compile("\\('(.+?)',(\\d+.\\d+),(\\d+)\\),(.*)"); 
+
+    Matcher nodeMatcher = nodePattern.matcher(data); 
+
+    int nodeID = -1; 
+
+    while (nodeMatcher.matches()) { 
+
+      String nodeData = nodeMatcher.group(2); 
+      String remainingData = nodeMatcher.group(3); 
+
+      nodeID++; 
+
+      Node<String> currentNode; 
+      if (nodes.containsKey(nodeID)) { 
+        currentNode = nodes.get(nodeID); 
+      } else { 
+        currentNode = new Node<String>(nodeID); 
+        nodes.put(nodeID, currentNode); 
+      } 
+
+      if (logger.isLoggable(Level.FINE)) logger.fine("Node " + nodeID + ":"); 
+
+      Matcher arcMatcher = arcPattern.matcher(nodeData); 
+
+      while (arcMatcher.matches()) { 
+        String arcLabel = arcMatcher.group(1); 
+        double arcWeight = Double.valueOf(arcMatcher.group(2)); 
+        int destinationNodeID = nodeID + Integer.valueOf(arcMatcher.group(3)); 
+
+        Node<String> destinationNode; 
+        if (nodes.containsKey(destinationNodeID)) { 
+          destinationNode = nodes.get(destinationNodeID); 
+        } else { 
+          destinationNode = new Node<String>(destinationNodeID); 
+          nodes.put(destinationNodeID, destinationNode); 
+        } 
+
+        String remainingArcs = arcMatcher.group(4); 
+
+        if (logger.isLoggable(Level.FINE)) logger.fine("\t" + arcLabel + " " + arcWeight + " " + destinationNodeID); 
+
+        currentNode.addArc(destinationNode, (float) arcWeight, arcLabel); 
+
+        arcMatcher = arcPattern.matcher(remainingArcs); 
+      } 
+
+      nodeMatcher = nodePattern.matcher(remainingData); 
+    } 
+
+    List<Node<String>> nodeList = new ArrayList<Node<String>>(nodes.values()); 
+    Collections.sort(nodeList, new NodeIdentifierComparator()); 
+
+    if (logger.isLoggable(Level.FINE)) logger.fine(nodeList.toString()); 
+
+    return new Lattice<String>(nodeList, new JoshuaConfiguration()); 
+  } 
 }