You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/05/31 19:39:24 UTC
[4/5] incubator-joshua git commit: Merge branch 'sparse' of https://github.com/fhieber/incubator-joshua into JOSHUA-PR21

Merge branch 'sparse' of https://github.com/fhieber/incubator-joshua into JOSHUA-PR21

# Conflicts:
#	lib/ivy.xml
#	src/main/java/org/apache/joshua/decoder/Decoder.java
#	src/main/java/org/apache/joshua/decoder/ff/RuleLength.java
#	src/main/java/org/apache/joshua/decoder/ff/RuleShape.java


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/5c0d5388
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/5c0d5388
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/5c0d5388

Branch: refs/heads/JOSHUA-252
Commit: 5c0d5388ae7a76538337bf89bd6ac9a04d2c6dff
Parents: 9e70266 5591c67
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue May 31 15:39:04 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue May 31 15:39:04 2016 -0400

----------------------------------------------------------------------
 lib/ivy.xml                                     |  17 +++
 src/joshua/decoder/ff/LexicalFeatures.java      | 131 +++++++++++++++++++
 .../org/apache/joshua/corpus/Vocabulary.java    |  13 +-
 .../java/org/apache/joshua/decoder/Decoder.java |  17 ++-
 .../joshua/decoder/JoshuaConfiguration.java     |  10 +-
 .../apache/joshua/decoder/ff/OOVPenalty.java    |  15 ++-
 .../org/apache/joshua/decoder/ff/RuleFF.java    | 109 +++++++++------
 .../apache/joshua/decoder/ff/RuleLength.java    |  13 +-
 .../org/apache/joshua/decoder/ff/RuleShape.java |  67 +++++++---
 .../apache/joshua/decoder/ff/WordPenalty.java   |  10 +-
 .../lm/berkeley_lm/LMGrammarBerkeleyTest.java   |   2 +-
 .../system/MultithreadedTranslationTests.java   |   2 +-
 .../system/StructuredTranslationTest.java       |   2 +-
 13 files changed, 314 insertions(+), 94 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5c0d5388/src/main/java/org/apache/joshua/corpus/Vocabulary.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/corpus/Vocabulary.java
index 8416e4a,0000000..f1bf53d
mode 100644,000000..100644
--- a/src/main/java/org/apache/joshua/corpus/Vocabulary.java
+++ b/src/main/java/org/apache/joshua/corpus/Vocabulary.java
@@@ -1,295 -1,0 +1,302 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.corpus;
 +
 +import java.io.BufferedInputStream;
 +import java.io.BufferedOutputStream;
 +import java.io.DataInputStream;
 +import java.io.DataOutputStream;
 +import java.io.Externalizable;
 +import java.io.File;
 +import java.io.FileInputStream;
 +import java.io.FileOutputStream;
 +import java.io.IOException;
 +import java.io.ObjectInput;
 +import java.io.ObjectOutput;
 +import java.util.ArrayList;
 +import java.util.HashMap;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.concurrent.locks.StampedLock;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.ff.lm.NGramLanguageModel;
 +import org.apache.joshua.util.FormatUtils;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +/**
 + * Static singular vocabulary class.
 + * Supports (de-)serialization into a vocabulary file.
 + *
 + * @author Juri Ganitkevitch
 + */
 +
 +public class Vocabulary implements Externalizable {
 +
 +  private static final Logger LOG = LoggerFactory.getLogger(Vocabulary.class);
 +  private final static ArrayList<NGramLanguageModel> LMs = new ArrayList<>();
 +
 +  private static List<String> idToString;
 +  private static Map<String, Integer> stringToId;
 +  private static final StampedLock lock = new StampedLock();
 +
 +  static final int UNKNOWN_ID = 0;
 +  static final String UNKNOWN_WORD = "<unk>";
 +
 +  public static final String START_SYM = "<s>";
 +  public static final String STOP_SYM = "</s>";
 +
 +  static {
 +    clear();
 +  }
 +
 +  public static boolean registerLanguageModel(NGramLanguageModel lm) {
 +    long lock_stamp = lock.writeLock();
 +    try {
 +      // Store the language model.
 +      LMs.add(lm);
 +      // Notify it of all the existing words.
 +      boolean collision = false;
 +      for (int i = idToString.size() - 1; i > 0; i--)
 +        collision = collision || lm.registerWord(idToString.get(i), i);
 +      return collision;
 +    } finally {
 +      lock.unlockWrite(lock_stamp);
 +    }
 +  }
 +
 +  /**
 +   * Reads a vocabulary from file. This deletes any additions to the vocabulary made prior to
 +   * reading the file.
 +   *
 +   * @param vocab_file path to a vocabulary file
 +   * @return Returns true if vocabulary was read without mismatches or collisions.
 +   * @throws IOException of the file cannot be found or read properly
 +   */
 +  public static boolean read(final File vocab_file) throws IOException {
 +    DataInputStream vocab_stream =
 +        new DataInputStream(new BufferedInputStream(new FileInputStream(vocab_file)));
 +    int size = vocab_stream.readInt();
 +    LOG.info("Read {} entries from the vocabulary", size);
 +    clear();
 +    for (int i = 0; i < size; i++) {
 +      int id = vocab_stream.readInt();
 +      String token = vocab_stream.readUTF();
 +      if (id != Math.abs(id(token))) {
 +        vocab_stream.close();
 +        return false;
 +      }
 +    }
 +    vocab_stream.close();
 +    return (size + 1 == idToString.size());
 +  }
 +
 +  public static void write(String file_name) throws IOException {
 +    long lock_stamp =lock.readLock();
 +    try {
 +      File vocab_file = new File(file_name);
 +      DataOutputStream vocab_stream =
 +          new DataOutputStream(new BufferedOutputStream(new FileOutputStream(vocab_file)));
 +      vocab_stream.writeInt(idToString.size() - 1);
 +      LOG.info("Writing vocabulary: {} tokens", idToString.size() - 1);
 +      for (int i = 1; i < idToString.size(); i++) {
 +        vocab_stream.writeInt(i);
 +        vocab_stream.writeUTF(idToString.get(i));
 +      }
 +      vocab_stream.close();
 +    }
 +    finally{
 +      lock.unlockRead(lock_stamp);
 +    }
 +  }
 +
 +  /**
 +   * Get the id of the token if it already exists, new id is created otherwise.
 +   *
 +   * TODO: currently locks for every call. Separate constant (frozen) ids from
 +   * changing (e.g. OOV) ids. Constant ids could be immutable -&gt; no locking.
 +   * Alternatively: could we use ConcurrentHashMap to not have to lock if
 +   * actually contains it and only lock for modifications?
 +   * 
 +   * @param token a token to obtain an id for
 +   * @return the token id
 +   */
 +  public static int id(String token) {
 +    // First attempt an optimistic read
 +    long attempt_read_lock = lock.tryOptimisticRead();
 +    if (stringToId.containsKey(token)) {
 +      int resultId = stringToId.get(token);
 +      if (lock.validate(attempt_read_lock)) {
 +        return resultId;
 +      }
 +    }
 +
 +    // The optimistic read failed, try a read with a stamped read lock
 +    long read_lock_stamp = lock.readLock();
 +    try {
 +      if (stringToId.containsKey(token)) {
 +        return stringToId.get(token);
 +      }
 +    } finally {
 +      lock.unlockRead(read_lock_stamp);
 +    }
 +
 +    // Looks like the id we want is not there, let's get a write lock and add it
 +    long write_lock_stamp = lock.writeLock();
 +    try {
 +      if (stringToId.containsKey(token)) {
 +        return stringToId.get(token);
 +      }
 +      int id = idToString.size() * (FormatUtils.isNonterminal(token) ? -1 : 1);
 +
 +      // register this (token,id) mapping with each language
 +      // model, so that they can map it to their own private
 +      // vocabularies
 +      for (NGramLanguageModel lm : LMs)
 +        lm.registerWord(token, Math.abs(id));
 +
 +      idToString.add(token);
 +      stringToId.put(token, id);
 +      return id;
 +    } finally {
 +      lock.unlockWrite(write_lock_stamp);
 +    }
 +  }
 +
 +  public static boolean hasId(int id) {
 +    long lock_stamp = lock.readLock();
 +    try {
 +      id = Math.abs(id);
 +      return (id < idToString.size());
 +    }
 +    finally{
 +      lock.unlockRead(lock_stamp);
 +    }
 +  }
 +
 +  public static int[] addAll(String sentence) {
 +    return addAll(sentence.split("\\s+"));
 +  }
 +
 +  public static int[] addAll(String[] tokens) {
 +    int[] ids = new int[tokens.length];
 +    for (int i = 0; i < tokens.length; i++)
 +      ids[i] = id(tokens[i]);
 +    return ids;
 +  }
 +
 +  public static String word(int id) {
 +    long lock_stamp = lock.readLock();
 +    try {
 +      id = Math.abs(id);
 +      return idToString.get(id);
 +    }
 +    finally{
 +      lock.unlockRead(lock_stamp);
 +    }
 +  }
 +
 +  public static String getWords(int[] ids) {
-     if (ids.length == 0) return "";
++    return getWords(ids, " ");
++  }
++  
++  public static String getWords(int[] ids, final String separator) {
++    if (ids.length == 0) {
++      return "";
++    }
 +    StringBuilder sb = new StringBuilder();
-     for (int i = 0; i < ids.length - 1; i++)
-       sb.append(word(ids[i])).append(" ");
++    for (int i = 0; i < ids.length - 1; i++) {
++      sb.append(word(ids[i])).append(separator);
++    }
 +    return sb.append(word(ids[ids.length - 1])).toString();
 +  }
 +
 +  public static String getWords(final Iterable<Integer> ids) {
 +    StringBuilder sb = new StringBuilder();
 +    for (int id : ids)
 +      sb.append(word(id)).append(" ");
 +    return sb.deleteCharAt(sb.length() - 1).toString();
 +  }
 +
 +  public static int getUnknownId() {
 +    return UNKNOWN_ID;
 +  }
 +
 +  public static String getUnknownWord() {
 +    return UNKNOWN_WORD;
 +  }
 +
 +  public static int size() {
 +    long lock_stamp = lock.readLock();
 +    try {
 +      return idToString.size();
 +    } finally {
 +      lock.unlockRead(lock_stamp);
 +    }
 +  }
 +
 +  public static synchronized int getTargetNonterminalIndex(int id) {
 +    return FormatUtils.getNonterminalIndex(word(id));
 +  }
 +
 +  /**
 +   * Clears the vocabulary and initializes it with an unknown word. Registered
 +   * language models are left unchanged.
 +   */
 +  public static void clear() {
 +    long lock_stamp = lock.writeLock();
 +    try {
 +      idToString = new ArrayList<String>();
 +      stringToId = new HashMap<String, Integer>();
 +
 +      idToString.add(UNKNOWN_ID, UNKNOWN_WORD);
 +      stringToId.put(UNKNOWN_WORD, UNKNOWN_ID);
 +    } finally {
 +      lock.unlockWrite(lock_stamp);
 +    }
 +  }
 +
 +  public static void unregisterLanguageModels() {
 +    LMs.clear();
 +  }
 +
 +  @Override
 +  public void writeExternal(ObjectOutput out) throws IOException {
 +    // TODO Auto-generated method stub
 +
 +  }
 +
 +  @Override
 +  public void readExternal(ObjectInput in)
 +      throws IOException, ClassNotFoundException {
 +    // TODO Auto-generated method stub
 +
 +  }
 +
 +  @Override
 +  public boolean equals(Object o) {
 +    if(getClass() == o.getClass()) {
 +      return true;
 +    } else {
 +      return false;
 +    }
 +  }
 +
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5c0d5388/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/Decoder.java
index 8535b11,0000000..6fa5eb8
mode 100644,000000..100644
--- a/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/src/main/java/org/apache/joshua/decoder/Decoder.java
@@@ -1,975 -1,0 +1,974 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder;
 +
 +import static org.apache.joshua.decoder.ff.FeatureVector.DENSE_FEATURE_NAMES;
 +
 +import java.io.BufferedWriter;
 +import java.io.File;
 +import java.io.IOException;
 +import java.io.OutputStream;
 +import java.io.FileNotFoundException;
 +import java.io.FileWriter;
 +import java.lang.reflect.Constructor;
 +import java.util.ArrayList;
 +import java.util.HashMap;
 +import java.util.HashSet;
 +import java.util.List;
 +import java.util.concurrent.ArrayBlockingQueue;
 +import java.util.concurrent.BlockingQueue;
 +
 +import com.google.common.base.Strings;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.ff.FeatureVector;
 +import org.apache.joshua.decoder.JoshuaConfiguration.INPUT_TYPE;
 +import org.apache.joshua.decoder.JoshuaConfiguration.SERVER_TYPE;
 +import org.apache.joshua.decoder.ff.FeatureFunction;
 +import org.apache.joshua.decoder.ff.PhraseModel;
 +import org.apache.joshua.decoder.ff.StatefulFF;
 +import org.apache.joshua.decoder.ff.lm.LanguageModelFF;
 +import org.apache.joshua.decoder.ff.tm.Grammar;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.ff.tm.Trie;
 +import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
 +import org.apache.joshua.decoder.ff.tm.hash_based.MemoryBasedBatchGrammar;
 +import org.apache.joshua.decoder.ff.tm.packed.PackedGrammar;
 +import org.apache.joshua.decoder.io.JSONMessage;
 +import org.apache.joshua.decoder.io.TranslationRequestStream;
 +import org.apache.joshua.decoder.phrase.PhraseTable;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.apache.joshua.util.FileUtility;
 +import org.apache.joshua.util.FormatUtils;
 +import org.apache.joshua.util.Regex;
 +import org.apache.joshua.util.io.LineReader;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +/**
 + * This class handles decoder initialization and the complication introduced by multithreading.
 + *
 + * After initialization, the main entry point to the Decoder object is
 + * decodeAll(TranslationRequest), which returns a set of Translation objects wrapped in an iterable
 + * Translations object. It is important that we support multithreading both (a) across the sentences
 + * within a request and (b) across requests, in a round-robin fashion. This is done by maintaining a
 + * fixed sized concurrent thread pool. When a new request comes in, a RequestParallelizer thread is
 + * launched. This object iterates over the request's sentences, obtaining a thread from the
 + * thread pool, and using that thread to decode the sentence. If a decoding thread is not available,
 + * it will block until one is in a fair (FIFO) manner. RequestParallelizer thereby permits intra-request
 + * parallelization by separating out reading the input stream from processing the translated sentences,
 + * but also ensures that round-robin parallelization occurs, since RequestParallelizer uses the
 + * thread pool before translating each request.
 + *
 + * A decoding thread is handled by DecoderThread and launched from DecoderThreadRunner. The purpose
 + * of the runner is to record where to place the translated sentence when it is done (i.e., which
 + * Translations object). Translations itself is an iterator whose next() call blocks until the next
 + * translation is available.
 + *
 + * @author Matt Post post@cs.jhu.edu
 + * @author Zhifei Li, zhifei.work@gmail.com
 + * @author wren ng thornton wren@users.sourceforge.net
 + * @author Lane Schwartz dowobeha@users.sourceforge.net
 + */
 +public class Decoder {
 +
 +  private static final Logger LOG = LoggerFactory.getLogger(Decoder.class);
 +
 +  private final JoshuaConfiguration joshuaConfiguration;
 +
 +  public JoshuaConfiguration getJoshuaConfiguration() {
 +    return joshuaConfiguration;
 +  }
 +
 +  /*
 +   * Many of these objects themselves are global objects. We pass them in when constructing other
 +   * objects, so that they all share pointers to the same object. This is good because it reduces
 +   * overhead, but it can be problematic because of unseen dependencies (for example, in the
 +   * Vocabulary shared by language model, translation grammar, etc).
 +   */
 +  private List<Grammar> grammars;
 +  private ArrayList<FeatureFunction> featureFunctions;
 +  private PhraseTable customPhraseTable;
 +
 +  /* The feature weights. */
 +  public static FeatureVector weights;
 +
 +  public static int VERBOSE = 1;
 +
 +  private BlockingQueue<DecoderThread> threadPool = null;
 +
 +  // ===============================================================
 +  // Constructors
 +  // ===============================================================
 +
 +  /**
 +   * Constructor method that creates a new decoder using the specified configuration file.
 +   *
 +   * @param joshuaConfiguration a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
 +   * @param configFile name of configuration file.
 +   */
 +  public Decoder(JoshuaConfiguration joshuaConfiguration, String configFile) {
 +    this(joshuaConfiguration);
 +    this.initialize(configFile);
 +  }
 +
 +  /**
 +   * Factory method that creates a new decoder using the specified configuration file.
 +   *
 +   * @param configFile Name of configuration file.
 +   * @return a configured {@link org.apache.joshua.decoder.Decoder}
 +   */
 +  public static Decoder createDecoder(String configFile) {
 +    JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
 +    return new Decoder(joshuaConfiguration, configFile);
 +  }
 +
 +  /**
 +   * Constructs an uninitialized decoder for use in testing.
 +   * <p>
 +   * This method is private because it should only ever be called by the
 +   * {@link #getUninitalizedDecoder()} method to provide an uninitialized decoder for use in
 +   * testing.
 +   */
 +  private Decoder(JoshuaConfiguration joshuaConfiguration) {
 +    this.joshuaConfiguration = joshuaConfiguration;
 +    this.grammars = new ArrayList<Grammar>();
 +    this.threadPool = new ArrayBlockingQueue<DecoderThread>(
 +        this.joshuaConfiguration.num_parallel_decoders, true);
 +    this.customPhraseTable = null;
 +  }
 +
 +  /**
 +   * Gets an uninitialized decoder for use in testing.
 +   * <p>
 +   * This method is called by unit tests or any outside packages (e.g., MERT) relying on the
 +   * decoder.
 +   * @param joshuaConfiguration a {@link org.apache.joshua.decoder.JoshuaConfiguration} object
 +   * @return an uninitialized decoder for use in testing
 +   */
 +  static public Decoder getUninitalizedDecoder(JoshuaConfiguration joshuaConfiguration) {
 +    return new Decoder(joshuaConfiguration);
 +  }
 +
 +  // ===============================================================
 +  // Public Methods
 +  // ===============================================================
 +
 +  /**
 +   * This class is responsible for getting sentences from the TranslationRequest and procuring a
 +   * DecoderThreadRunner to translate it. Each call to decodeAll(TranslationRequest) launches a
 +   * thread that will read the request's sentences, obtain a DecoderThread to translate them, and
 +   * then place the Translation in the appropriate place.
 +   *
 +   * @author Matt Post <po...@cs.jhu.edu>
 +   *
 +   */
 +  private class RequestParallelizer extends Thread {
 +    /* Source of sentences to translate. */
 +    private final TranslationRequestStream request;
 +
 +    /* Where to put translated sentences. */
 +    private final Translations response;
 +
 +    /* Sometimes we need to communicate with the client even when we didn't get a new sentence
 +     * (e.g., metadata)
 +     */
 +    private OutputStream out;
 +
 +    RequestParallelizer(TranslationRequestStream request, Translations response, OutputStream out) {
 +      this.request = request;
 +      this.response = response;
 +      this.out = out;
 +    }
 +
 +    @Override
 +    public void run() {
 +      /*
 +       * Repeatedly get an input sentence, wait for a DecoderThread, and then start a new thread to
 +       * translate the sentence. We start a new thread (via DecoderRunnerThread) as opposed to
 +       * blocking, so that the RequestHandler can go on to the next sentence in this request, which
 +       * allows parallelization across the sentences of the request.
 +       */
 +      for (;;) {
 +        Sentence sentence = null;
 +        try {
 +          sentence = request.next();
 +
 +        } catch (MetaDataException meta) {
 +          try {
 +            handleMetadata(meta);
 +          } catch (IOException e) {
 +            e.printStackTrace();
 +          }
 +
 +          continue;
 +        }
 +
 +        if (sentence == null) {
 +          response.finish();
 +          break;
 +        }
 +
 +        // This will block until a DecoderThread becomes available.
 +        DecoderThread thread = Decoder.this.getThread();
 +        new DecoderThreadRunner(thread, sentence, response).start();
 +      }
 +    }
 +
 +    /**
 +     * When metadata is found on the input, it needs to be processed. That is done here. Sometimes
 +     * this involves returning data to the client.
 +     *
 +     * @param meta
 +     * @throws IOException
 +     */
 +    private void handleMetadata(MetaDataException meta) throws IOException {
 +      if (meta.type().equals("set_weight")) {
 +        // Change a decoder weight
 +        String[] tokens = meta.tokens();
 +        if (tokens.length != 3) {
 +          LOG.error("weight change requires three tokens");
 +        } else {
 +          float old_weight = Decoder.weights.getWeight(tokens[1]);
 +          Decoder.weights.set(tokens[1], Float.parseFloat(tokens[2]));
 +          LOG.error("@set_weight: {} {} -> {}", tokens[1], old_weight,
 +              Decoder.weights.getWeight(tokens[1]));
 +        }
 +
 +        // TODO: return a JSON object with this weight or all weights
 +        out.write("".getBytes());
 +
 +      } else if (meta.type().equals("get_weight")) {
 +        // TODO: add to JSON object, send back
 +
 +        String[] tokens = meta.tokens();
 +
 +        LOG.error("{} = {}", tokens[1], Decoder.weights.getWeight(tokens[1]));
 +
 +        out.write("".getBytes());
 +
 +      } else if (meta.type().equals("add_rule")) {
 +        String tokens[] = meta.tokens(" \\|\\|\\| ");
 +
 +        if (tokens.length != 2) {
 +          LOG.error("* INVALID RULE '{}'", meta);
 +          out.write("bad rule".getBytes());
 +          return;
 +        }
 +
 +        Rule rule = new HieroFormatReader().parseLine(
 +            String.format("[X] ||| [X,1] %s ||| [X,1] %s ||| custom=1", tokens[0], tokens[1]));
 +        Decoder.this.customPhraseTable.addRule(rule);
 +        rule.estimateRuleCost(featureFunctions);
 +        LOG.info("Added custom rule {}", formatRule(rule));
 +
 +        String response = String.format("Added rule %s", formatRule(rule));
 +        out.write(response.getBytes());
 +
 +      } else if (meta.type().equals("list_rules")) {
 +
 +        JSONMessage message = new JSONMessage();
 +
 +        // Walk the the grammar trie
 +        ArrayList<Trie> nodes = new ArrayList<Trie>();
 +        nodes.add(customPhraseTable.getTrieRoot());
 +
 +        while (nodes.size() > 0) {
 +          Trie trie = nodes.remove(0);
 +
 +          if (trie == null)
 +            continue;
 +
 +          if (trie.hasRules()) {
 +            for (Rule rule: trie.getRuleCollection().getRules()) {
 +              message.addRule(formatRule(rule));
 +            }
 +          }
 +
 +          if (trie.getExtensions() != null)
 +            nodes.addAll(trie.getExtensions());
 +        }
 +
 +        out.write(message.toString().getBytes());
 +
 +      } else if (meta.type().equals("remove_rule")) {
 +        // Remove a rule from a custom grammar, if present
 +        String[] tokens = meta.tokenString().split(" \\|\\|\\| ");
 +        if (tokens.length != 2) {
 +          out.write(String.format("Invalid delete request: '%s'", meta.tokenString()).getBytes());
 +          return;
 +        }
 +
 +        // Search for the rule in the trie
 +        int nt_i = Vocabulary.id(joshuaConfiguration.default_non_terminal);
 +        Trie trie = customPhraseTable.getTrieRoot().match(nt_i);
 +
 +        for (String word: tokens[0].split("\\s+")) {
 +          int id = Vocabulary.id(word);
 +          Trie nextTrie = trie.match(id);
 +          if (nextTrie != null)
 +            trie = nextTrie;
 +        }
 +
 +        if (trie.hasRules()) {
 +          Rule matched = null;
 +          for (Rule rule: trie.getRuleCollection().getRules()) {
 +            String target = rule.getEnglishWords();
 +            target = target.substring(target.indexOf(' ') + 1);
 +
 +            if (tokens[1].equals(target)) {
 +              matched = rule;
 +              break;
 +            }
 +          }
 +          trie.getRuleCollection().getRules().remove(matched);
 +          out.write(String.format("Removed rule %s", formatRule(matched)).getBytes());
 +          return;
 +        }
 +
 +        out.write(String.format("No such rule %s", meta.tokenString()).getBytes());
 +      }
 +    }
 +
 +    /**
 +     * Strips the nonterminals from the lefthand side of the rule.
 +     *
 +     * @param rule
 +     * @return
 +     */
 +    private String formatRule(Rule rule) {
 +      String ruleString = "";
 +      boolean first = true;
 +      for (int word: rule.getFrench()) {
 +        if (!first)
 +          ruleString += " " + Vocabulary.word(word);
 +        first = false;
 +      }
 +
 +      ruleString += " |||"; // space will get added with first English word
 +      first = true;
 +      for (int word: rule.getEnglish()) {
 +        if (!first)
 +          ruleString += " " + Vocabulary.word(word);
 +        first = false;
 +      }
 +
 +      // strip of the leading space
 +      return ruleString.substring(1);
 +    }
 +  }
 +
 +  /**
 +   * Retrieve a thread from the thread pool, blocking until one is available. The blocking occurs in
 +   * a fair fashion (i.e,. FIFO across requests).
 +   *
 +   * @return a thread that can be used for decoding.
 +   */
 +  public DecoderThread getThread() {
 +    try {
 +      return threadPool.take();
 +    } catch (InterruptedException e) {
 +      // TODO Auto-generated catch block
 +      e.printStackTrace();
 +    }
 +    return null;
 +  }
 +
 +  /**
 +   * This class handles running a DecoderThread (which takes care of the actual translation of an
 +   * input Sentence, returning a Translation object when its done). This is done in a thread so as
 +   * not to tie up the RequestHandler that launched it, freeing it to go on to the next sentence in
 +   * the TranslationRequest, in turn permitting parallelization across the sentences of a request.
 +   *
 +   * When the decoder thread is finshed, the Translation object is placed in the correct place in
 +   * the corresponding Translations object that was returned to the caller of
 +   * Decoder.decodeAll(TranslationRequest).
 +   *
 +   * @author Matt Post <po...@cs.jhu.edu>
 +   */
 +  private class DecoderThreadRunner extends Thread {
 +
 +    private final DecoderThread decoderThread;
 +    private final Sentence sentence;
 +    private final Translations translations;
 +
 +    DecoderThreadRunner(DecoderThread thread, Sentence sentence, Translations translations) {
 +      this.decoderThread = thread;
 +      this.sentence = sentence;
 +      this.translations = translations;
 +    }
 +
 +    @Override
 +    public void run() {
 +      /*
 +       * Use the thread to translate the sentence. Then record the translation with the
 +       * corresponding Translations object, and return the thread to the pool.
 +       */
 +      try {
 +        Translation translation = decoderThread.translate(this.sentence);
 +        translations.record(translation);
 +
 +        /*
 +         * This is crucial! It's what makes the thread available for the next sentence to be
 +         * translated.
 +         */
 +        threadPool.put(decoderThread);
 +      } catch (Exception e) {
 +        throw new RuntimeException(String.format(
 +            "Input %d: FATAL UNCAUGHT EXCEPTION: %s", sentence.id(), e.getMessage()), e);
 +        //        translations.record(new Translation(sentence, null, featureFunctions, joshuaConfiguration));
 +      }
 +    }
 +  }
 +
 +  /**
 +   * This function is the main entry point into the decoder. It translates all the sentences in a
 +   * (possibly boundless) set of input sentences. Each request launches its own thread to read the
 +   * sentences of the request.
 +   *
 +   * @param request the populated {@link org.apache.joshua.decoder.io.TranslationRequestStream}
 +   * @param out an appropriate {@link java.io.OutputStream} to write results to
 +   * @throws IOException if there is an error with the input stream or writing the output
 +   */
 +  public void decodeAll(TranslationRequestStream request, OutputStream out) throws IOException {
 +    Translations translations = new Translations(request);
 +
 +    /* Start a thread to handle requests on the input stream */
 +    new RequestParallelizer(request, translations, out).start();
 +
 +    // Create the n-best output stream
 +    FileWriter nbest_out = null;
 +    if (joshuaConfiguration.n_best_file != null)
 +      nbest_out = new FileWriter(joshuaConfiguration.n_best_file);
 +
 +    for (;;) {
 +      Translation translation = translations.next();
 +      if (translation == null)
 +        break;
 +
 +      if (joshuaConfiguration.input_type == INPUT_TYPE.json || joshuaConfiguration.server_type == SERVER_TYPE.HTTP) {
 +        JSONMessage message = JSONMessage.buildMessage(translation);
 +        out.write(message.toString().getBytes());
 +
 +      } else {
 +        /**
 +         * We need to munge the feature value outputs in order to be compatible with Moses tuners.
 +         * Whereas Joshua writes to STDOUT whatever is specified in the `output-format` parameter,
 +         * Moses expects the simple translation on STDOUT and the n-best list in a file with a fixed
 +         * format.
 +         */
 +        String text;
 +        if (joshuaConfiguration.moses) {
 +          text = translation.toString().replaceAll("=", "= ");
 +          // Write the complete formatted string to STDOUT
 +          if (joshuaConfiguration.n_best_file != null)
 +            nbest_out.write(text);
 +
 +          // Extract just the translation and output that to STDOUT
 +          text = text.substring(0,  text.indexOf('\n'));
 +          String[] fields = text.split(" \\|\\|\\| ");
 +          text = fields[1] + "\n";
 +
 +        } else {
 +          text = translation.toString();
 +        }
 +
 +        out.write(text.getBytes());
 +      }
 +      out.flush();
 +    }
 +
 +    if (joshuaConfiguration.n_best_file != null)
 +      nbest_out.close();
 +  }
 +
 +
 +  /**
 +   * We can also just decode a single sentence.
 +   *
 +   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
 +   * @return the sentence {@link org.apache.joshua.decoder.Translation}
 +   */
 +  public Translation decode(Sentence sentence) {
 +    // Get a thread.
 +
 +    try {
 +      DecoderThread thread = threadPool.take();
 +      Translation translation = thread.translate(sentence);
 +      threadPool.put(thread);
 +
 +      return translation;
 +
 +    } catch (InterruptedException e) {
 +      e.printStackTrace();
 +    }
 +
 +    return null;
 +  }
 +
 +  /**
 +   * Clean shutdown of Decoder, resetting all
 +   * static variables, such that any other instance of Decoder
 +   * afterwards gets a fresh start.
 +   */
 +  public void cleanUp() {
 +    // shut down DecoderThreads
 +    for (DecoderThread thread : threadPool) {
 +      try {
 +        thread.join();
 +      } catch (InterruptedException e) {
 +        e.printStackTrace();
 +      }
 +    }
 +    resetGlobalState();
 +  }
 +
 +  public static void resetGlobalState() {
 +    // clear/reset static variables
 +    DENSE_FEATURE_NAMES.clear();
 +    Vocabulary.clear();
 +    Vocabulary.unregisterLanguageModels();
 +    LanguageModelFF.resetLmIndex();
 +    StatefulFF.resetGlobalStateIndex();
 +  }
 +
 +  public static void writeConfigFile(double[] newWeights, String template, String outputFile,
 +      String newDiscriminativeModel) {
 +    try {
 +      int columnID = 0;
 +
 +      BufferedWriter writer = FileUtility.getWriteFileStream(outputFile);
 +      LineReader reader = new LineReader(template);
 +      try {
 +        for (String line : reader) {
 +          line = line.trim();
 +          if (Regex.commentOrEmptyLine.matches(line) || line.indexOf("=") != -1) {
 +            // comment, empty line, or parameter lines: just copy
 +            writer.write(line);
 +            writer.newLine();
 +
 +          } else { // models: replace the weight
 +            String[] fds = Regex.spaces.split(line);
 +            StringBuffer newSent = new StringBuffer();
 +            if (!Regex.floatingNumber.matches(fds[fds.length - 1])) {
 +              throw new IllegalArgumentException("last field is not a number; the field is: "
 +                  + fds[fds.length - 1]);
 +            }
 +
 +            if (newDiscriminativeModel != null && "discriminative".equals(fds[0])) {
 +              newSent.append(fds[0]).append(' ');
 +              newSent.append(newDiscriminativeModel).append(' ');// change the
 +              // file name
 +              for (int i = 2; i < fds.length - 1; i++) {
 +                newSent.append(fds[i]).append(' ');
 +              }
 +            } else {// regular
 +              for (int i = 0; i < fds.length - 1; i++) {
 +                newSent.append(fds[i]).append(' ');
 +              }
 +            }
 +            if (newWeights != null)
 +              newSent.append(newWeights[columnID++]);// change the weight
 +            else
 +              newSent.append(fds[fds.length - 1]);// do not change
 +
 +            writer.write(newSent.toString());
 +            writer.newLine();
 +          }
 +        }
 +      } finally {
 +        reader.close();
 +        writer.close();
 +      }
 +
 +      if (newWeights != null && columnID != newWeights.length) {
 +        throw new IllegalArgumentException("number of models does not match number of weights");
 +      }
 +
 +    } catch (IOException e) {
 +      e.printStackTrace();
 +    }
 +  }
 +
 +  // ===============================================================
 +  // Initialization Methods
 +  // ===============================================================
 +
 +  /**
 +   * Moses requires the pattern .*_.* for sparse features, and prohibits underscores in dense features. 
 +   * This conforms to that pattern. We assume non-conforming dense features start with tm_ or lm_,
 +   * and the only sparse feature that needs converting is OOVPenalty.
 +   *
 +   * @param feature
 +   * @return the feature in Moses format
 +   */
 +  private String mosesize(String feature) {
 +    if (joshuaConfiguration.moses) {
 +      if (feature.startsWith("tm_") || feature.startsWith("lm_"))
 +        return feature.replace("_", "-");
 +    }
 +
 +    return feature;
 +  }
 +
 +  /**
 +   * Initialize all parts of the JoshuaDecoder.
 +   *
 +   * @param configFile File containing configuration options
 +   * @return An initialized decoder
 +   */
 +  public Decoder initialize(String configFile) {
 +    try {
 +
 +      long pre_load_time = System.currentTimeMillis();
 +
 +      /* Weights can be listed in a separate file (denoted by parameter "weights-file") or directly
 +       * in the Joshua config file. Config file values take precedent.
 +       */
 +      this.readWeights(joshuaConfiguration.weights_file);
 +      
 +      
 +      /* Add command-line-passed weights to the weights array for processing below */
 +      if (!Strings.isNullOrEmpty(joshuaConfiguration.weight_overwrite)) {
 +        String[] tokens = joshuaConfiguration.weight_overwrite.split("\\s+");
 +        for (int i = 0; i < tokens.length; i += 2) {
 +          String feature = tokens[i];
 +          float value = Float.parseFloat(tokens[i+1]);
 +
 +          if (joshuaConfiguration.moses)
 +            feature = demoses(feature);
 +
 +          joshuaConfiguration.weights.add(String.format("%s %s", feature, tokens[i+1]));
 +          LOG.info("COMMAND LINE WEIGHT: {} -> {}", feature, value);
 +        }
 +      }
 +
 +      /* Read the weights found in the config file */
 +      for (String pairStr: joshuaConfiguration.weights) {
 +        String pair[] = pairStr.split("\\s+");
 +
 +        /* Sanity check for old-style unsupported feature invocations. */
 +        if (pair.length != 2) {
 +          StringBuilder errMsg = new StringBuilder();
 +          errMsg.append("FATAL: Invalid feature weight line found in config file.\n");
 +          errMsg.append(String.format("The line was '%s'\n", pairStr));
 +          errMsg.append("You might be using an old version of the config file that is no longer supported\n");
 +          errMsg.append("Check joshua-decoder.org or email joshua_support@googlegroups.com for help\n");
 +          errMsg.append("Code = " + 17);
 +          throw new RuntimeException(errMsg.toString());
 +        }
 +
 +        weights.set(pair[0], Float.parseFloat(pair[1]));
 +      }
 +
 +      LOG.info("Read {} weights ({} of them dense)", weights.size(), DENSE_FEATURE_NAMES.size());
 +
 +      // Do this before loading the grammars and the LM.
 +      this.featureFunctions = new ArrayList<FeatureFunction>();
 +
 +      // Initialize and load grammars. This must happen first, since the vocab gets defined by
 +      // the packed grammar (if any)
 +      this.initializeTranslationGrammars();
 +      LOG.info("Grammar loading took: {} seconds.",
 +          (System.currentTimeMillis() - pre_load_time) / 1000);
 +
 +      // Initialize the features: requires that LM model has been initialized.
 +      this.initializeFeatureFunctions();
 +
 +      // This is mostly for compatibility with the Moses tuning script
 +      if (joshuaConfiguration.show_weights_and_quit) {
 +        for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
 +          String name = DENSE_FEATURE_NAMES.get(i);
 +          if (joshuaConfiguration.moses)
 +            System.out.println(String.format("%s= %.5f", mosesize(name), weights.getDense(i)));
 +          else
 +            System.out.println(String.format("%s %.5f", name, weights.getDense(i)));
 +        }
 +        System.exit(0);
 +      }
 +
 +      // Sort the TM grammars (needed to do cube pruning)
 +      if (joshuaConfiguration.amortized_sorting) {
 +        LOG.info("Grammar sorting happening lazily on-demand.");
 +      } else {
 +        long pre_sort_time = System.currentTimeMillis();
 +        for (Grammar grammar : this.grammars) {
 +          grammar.sortGrammar(this.featureFunctions);
 +        }
 +        LOG.info("Grammar sorting took {} seconds.",
 +            (System.currentTimeMillis() - pre_sort_time) / 1000);
 +      }
 +
 +      // Create the threads
 +      for (int i = 0; i < joshuaConfiguration.num_parallel_decoders; i++) {
 +        this.threadPool.put(new DecoderThread(this.grammars, Decoder.weights,
 +            this.featureFunctions, joshuaConfiguration));
 +      }
 +    } catch (IOException | InterruptedException e) {
 +      LOG.warn(e.getMessage(), e);
 +    }
 +
 +    return this;
 +  }
 +
 +  /**
 +   * Initializes translation grammars Retained for backward compatibility
 +   *
 +   * @param ownersSeen Records which PhraseModelFF's have been instantiated (one is needed for each
 +   *          owner)
 +   * @throws IOException
 +   */
 +  private void initializeTranslationGrammars() throws IOException {
 +
 +    if (joshuaConfiguration.tms.size() > 0) {
 +
 +      // collect packedGrammars to check if they use a shared vocabulary
 +      final List<PackedGrammar> packed_grammars = new ArrayList<>();
 +
 +      // tm = {thrax/hiero,packed,samt,moses} OWNER LIMIT FILE
 +      for (String tmLine : joshuaConfiguration.tms) {
 +
 +        String type = tmLine.substring(0,  tmLine.indexOf(' '));
 +        String[] args = tmLine.substring(tmLine.indexOf(' ')).trim().split("\\s+");
 +        HashMap<String, String> parsedArgs = FeatureFunction.parseArgs(args);
 +
 +        String owner = parsedArgs.get("owner");
 +        int span_limit = Integer.parseInt(parsedArgs.get("maxspan"));
 +        String path = parsedArgs.get("path");
 +
 +        Grammar grammar = null;
 +        if (! type.equals("moses") && ! type.equals("phrase")) {
 +          if (new File(path).isDirectory()) {
 +            try {
 +              PackedGrammar packed_grammar = new PackedGrammar(path, span_limit, owner, type, joshuaConfiguration);
 +              packed_grammars.add(packed_grammar);
 +              grammar = packed_grammar;
 +            } catch (FileNotFoundException e) {
 +              String msg = String.format("Couldn't load packed grammar from '%s'", path)
 +                  + "Perhaps it doesn't exist, or it may be an old packed file format.";
 +              throw new RuntimeException(e);
 +            }
 +          } else {
 +            // thrax, hiero, samt
 +            grammar = new MemoryBasedBatchGrammar(type, path, owner,
 +                joshuaConfiguration.default_non_terminal, span_limit, joshuaConfiguration);
 +          }
 +
 +        } else {
 +
 +          int maxSourceLen = parsedArgs.containsKey("max-source-len")
 +              ? Integer.parseInt(parsedArgs.get("max-source-len"))
 +              : -1;
 +
 +          joshuaConfiguration.search_algorithm = "stack";
 +          grammar = new PhraseTable(path, owner, type, joshuaConfiguration);
 +        }
 +
 +        this.grammars.add(grammar);
 +      }
 +
 +      checkSharedVocabularyChecksumsForPackedGrammars(packed_grammars);
 +
 +    } else {
 +      LOG.warn("no grammars supplied!  Supplying dummy glue grammar.");
 +      MemoryBasedBatchGrammar glueGrammar = new MemoryBasedBatchGrammar("glue", joshuaConfiguration);
 +      glueGrammar.setSpanLimit(-1);
 +      glueGrammar.addGlueRules(featureFunctions);
 +      this.grammars.add(glueGrammar);
 +    }
 +    
 +    /* Add the grammar for custom entries */
 +    this.customPhraseTable = new PhraseTable(null, "custom", "phrase", joshuaConfiguration);
 +    this.grammars.add(this.customPhraseTable);
 +    
 +    /* Create an epsilon-deleting grammar */
 +    if (joshuaConfiguration.lattice_decoding) {
 +      LOG.info("Creating an epsilon-deleting grammar");
 +      MemoryBasedBatchGrammar latticeGrammar = new MemoryBasedBatchGrammar("lattice", joshuaConfiguration);
 +      latticeGrammar.setSpanLimit(-1);
 +      HieroFormatReader reader = new HieroFormatReader();
 +
 +      String goalNT = FormatUtils.cleanNonTerminal(joshuaConfiguration.goal_symbol);
 +      String defaultNT = FormatUtils.cleanNonTerminal(joshuaConfiguration.default_non_terminal);
 +
 +      //FIXME: too many arguments
 +      String ruleString = String.format("[%s] ||| [%s,1] <eps> ||| [%s,1] ||| ", goalNT, goalNT, defaultNT,
 +          goalNT, defaultNT);
 +
 +      Rule rule = reader.parseLine(ruleString);
 +      latticeGrammar.addRule(rule);
 +      rule.estimateRuleCost(featureFunctions);
 +
 +      this.grammars.add(latticeGrammar);
 +    }
 +
 +    /* Now create a feature function for each owner */
 +    HashSet<String> ownersSeen = new HashSet<String>();
 +
 +    for (Grammar grammar: this.grammars) {
 +      String owner = Vocabulary.word(grammar.getOwner());
 +      if (! ownersSeen.contains(owner)) {
 +        this.featureFunctions.add(new PhraseModel(weights, new String[] { "tm", "-owner", owner },
 +            joshuaConfiguration, grammar));
 +        ownersSeen.add(owner);
 +      }
 +    }
 +
 +    LOG.info("Memory used {} MB",
 +        ((Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1000000.0));
 +  }
 +
 +  /**
 +   * Checks if multiple packedGrammars have the same vocabulary by comparing their vocabulary file checksums.
 +   */
 +  private static void checkSharedVocabularyChecksumsForPackedGrammars(final List<PackedGrammar> packed_grammars) {
 +    String previous_checksum = "";
 +    for (PackedGrammar grammar : packed_grammars) {
 +      final String checksum = grammar.computeVocabularyChecksum();
 +      if (previous_checksum.isEmpty()) {
 +        previous_checksum = checksum;
 +      } else {
 +        if (!checksum.equals(previous_checksum)) {
 +          throw new RuntimeException(
 +              "Trying to load multiple packed grammars with different vocabularies!" +
 +                  "Have you packed them jointly?");
 +        }
 +        previous_checksum = checksum;
 +      }
 +    }
 +  }
 +
 +  /*
 +   * This function reads the weights for the model. Feature names and their weights are listed one
 +   * per line in the following format:
 +   * 
 +   * FEATURE_NAME WEIGHT
 +   */
 +  private void readWeights(String fileName) {
 +    Decoder.weights = new FeatureVector();
 +
 +    if (fileName.equals(""))
 +      return;
 +
 +    try {
 +      LineReader lineReader = new LineReader(fileName);
 +
 +      for (String line : lineReader) {
 +        line = line.replaceAll("\\s+", " ");
 +
 +        if (line.equals("") || line.startsWith("#") || line.startsWith("//")
 +            || line.indexOf(' ') == -1)
 +          continue;
 +
 +        String tokens[] = line.split("\\s+");
 +        String feature = tokens[0];
 +        Float value = Float.parseFloat(tokens[1]);
 +
 +        // Kludge for compatibility with Moses tuners
 +        if (joshuaConfiguration.moses) {
 +          feature = demoses(feature);
 +        }
 +
 +        weights.increment(feature, value);
 +      }
 +    } catch (IOException ioe) {
 +      throw new RuntimeException(ioe);
 +    }
 +    LOG.info("Read {} weights from file '{}'", weights.size(), fileName);
 +  }
 +
 +  private String demoses(String feature) {
 +    if (feature.endsWith("="))
 +      feature = feature.replace("=", "");
 +    if (feature.equals("OOV_Penalty"))
 +      feature = "OOVPenalty";
 +    else if (feature.startsWith("tm-") || feature.startsWith("lm-"))
 +      feature = feature.replace("-",  "_");
 +    return feature;
 +  }
 +
 +  /**
 +   * Feature functions are instantiated with a line of the form
 +   *
 +   * <pre>
-    *   feature_function = FEATURE OPTIONS
++   *   FEATURE OPTIONS
 +   * </pre>
 +   *
 +   * Weights for features are listed separately.
 +   *
 +   * @throws IOException
 +   *
 +   */
 +  private void initializeFeatureFunctions() throws IOException {
 +
 +    for (String featureLine : joshuaConfiguration.features) {
-       // feature-function = NAME args
++      // line starts with NAME, followed by args
 +      // 1. create new class named NAME, pass it config, weights, and the args
 +
-       // Get rid of the leading crap.
-       featureLine = featureLine.replaceFirst("^feature_function\\s*=\\s*", "");
- 
 +      String fields[] = featureLine.split("\\s+");
 +      String featureName = fields[0];
++      
 +      try {
++        
 +        Class<?> clas = getClass(featureName);
 +        Constructor<?> constructor = clas.getConstructor(FeatureVector.class,
 +            String[].class, JoshuaConfiguration.class);
-         this.featureFunctions.add((FeatureFunction) constructor.newInstance(weights, fields, joshuaConfiguration));
++        FeatureFunction feature = (FeatureFunction) constructor.newInstance(weights, fields, joshuaConfiguration);
++        this.featureFunctions.add(feature);
++        
 +      } catch (Exception e) {
-         e.printStackTrace();
-         throw new RuntimeException("* FATAL: could not find a feature '" + featureName + "'");
++        throw new RuntimeException(String.format("Unable to instantiate feature function '%s'!", featureLine), e); 
 +      }
 +    }
 +
 +    for (FeatureFunction feature : featureFunctions) {
 +      LOG.info("FEATURE: {}", feature.logString());
- 
 +    }
 +
 +    weights.registerDenseFeatures(featureFunctions);
 +  }
 +
 +  /**
 +   * Searches a list of predefined paths for classes, and returns the first one found. Meant for
 +   * instantiating feature functions.
 +   *
 +   * @param name
 +   * @return the class, found in one of the search paths
 +   * @throws ClassNotFoundException
 +   */
 +  private Class<?> getClass(String featureName) {
 +    Class<?> clas = null;
 +
 +    String[] packages = { "org.apache.joshua.decoder.ff", "org.apache.joshua.decoder.ff.lm", "org.apache.joshua.decoder.ff.phrase" };
 +    for (String path : packages) {
 +      try {
 +        clas = Class.forName(String.format("%s.%s", path, featureName));
 +        break;
 +      } catch (ClassNotFoundException e) {
 +        try {
 +          clas = Class.forName(String.format("%s.%sFF", path, featureName));
 +          break;
 +        } catch (ClassNotFoundException e2) {
 +          // do nothing
 +        }
 +      }
 +    }
 +    return clas;
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5c0d5388/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
index 5acfd7e,0000000..dd7bafb
mode 100644,000000..100644
--- a/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
+++ b/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
@@@ -1,712 -1,0 +1,712 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder;
 +
 +import static org.apache.joshua.util.FormatUtils.cleanNonTerminal;
 +import static org.apache.joshua.util.FormatUtils.ensureNonTerminalBrackets;
 +
 +import java.io.File;
 +import java.io.FileWriter;
 +import java.io.IOException;
 +import java.io.PrintWriter;
 +import java.io.BufferedReader;
 +import java.io.FileReader;
 +import java.util.ArrayList;
 +import java.util.Collections;
 +
 +import org.apache.joshua.decoder.ff.StatefulFF;
 +import org.apache.joshua.decoder.ff.fragmentlm.Tree;
 +import org.apache.joshua.util.FormatUtils;
 +import org.apache.joshua.util.Regex;
 +import org.apache.joshua.util.io.LineReader;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +/**
 + * Configuration file for Joshua decoder.
 + *
 + * When adding new features to Joshua, any new configurable parameters should be added to this
 + * class.
 + *
 + * @author Zhifei Li, zhifei.work@gmail.com
 + * @author Matt Post post@cs.jhu.edu
 + */
 +public class JoshuaConfiguration {
 +
 +  private static final Logger LOG = LoggerFactory.getLogger(JoshuaConfiguration.class);
 +
 +  // whether to construct a StructuredTranslation object for each request instead of
 +  // printing to stdout. Used when the Decoder is used from Java directly.
 +  public Boolean use_structured_output = false;
 +
 +  // If set to true, Joshua will lowercase the input, creating an annotation that marks the
 +  // original case
 +  public boolean lowercase = false;
 +
 +  // If set to true, Joshua will recapitalize the output by projecting the case from aligned
 +  // source-side words
 +  public boolean project_case = false;
 +
 +  // List of grammar files to read
 +  public ArrayList<String> tms = new ArrayList<String>();
 +
 +  // A rule cache for commonly used tries to avoid excess object allocations
 +  // Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes.
 +  public Integer cachedRuleSize = new Integer(5000);
 +
 +  /*
 +   * The file to read the weights from (part of the sparse features implementation). Weights can
 +   * also just be listed in the main config file.
 +   */
 +  public String weights_file = "";
 +  // Default symbols. The symbol here should be enclosed in square brackets.
 +  public String default_non_terminal = FormatUtils.ensureNonTerminalBrackets("X");
 +  public String goal_symbol = FormatUtils.ensureNonTerminalBrackets("GOAL");
 +
 +  /*
 +   * A list of OOV symbols in the form
 +   *
 +   * [X1] weight [X2] weight [X3] weight ...
 +   *
 +   * where the [X] symbols are nonterminals and the weights are weights. For each OOV word w in the
 +   * input sentence, Joshua will create rules of the form
 +   *
 +   * X1 -> w (weight)
 +   *
 +   * If this is empty, an unweighted default_non_terminal is used.
 +   */
 +  public class OOVItem implements Comparable<OOVItem> {
 +    public String label;
 +
 +    public float weight;
 +
 +    OOVItem(String l, float w) {
 +      label = l;
 +      weight = w;
 +    }
 +    @Override
 +    public int compareTo(OOVItem other) {
 +      if (weight > other.weight)
 +        return -1;
 +      else if (weight < other.weight)
 +        return 1;
 +      return 0;
 +    }
 +  }
 +
 +  public ArrayList<OOVItem> oovList = null;
 +
 +  /*
 +   * Whether to segment OOVs into a lattice
 +   */
 +  public boolean segment_oovs = false;
 +
 +  /*
 +   * Enable lattice decoding.
 +   */
 +  public boolean lattice_decoding = false;
 +
 +  /*
 +   * If false, sorting of the complete grammar is done at load time. If true, grammar tries are not
 +   * sorted till they are first accessed. Amortized sorting means you get your first translation
 +   * much, much quicker (good for debugging), but that per-sentence decoding is a bit slower.
 +   */
 +  public boolean amortized_sorting = true;
 +  // syntax-constrained decoding
 +  public boolean constrain_parse = false;
 +
 +  public boolean use_pos_labels = false;
 +
 +  // oov-specific
 +  public boolean true_oovs_only = false;
 +
 +  /* Dynamic sentence-level filtering. */
 +  public boolean filter_grammar = false;
 +
 +  /* The cube pruning pop limit. Set to 0 for exhaustive pruning. */
 +  public int pop_limit = 100;
 +
 +  /* Maximum sentence length. Sentences longer than this are truncated. */
 +  public int maxlen = 200;
 +
 +  /*
 +   * N-best configuration.
 +   */
 +  // Make sure output strings in the n-best list are unique.
 +  public boolean use_unique_nbest = true;
 +
 +  /* Include the phrasal alignments in the output (not word-level alignmetns at the moment). */
 +  public boolean include_align_index = false;
 +
 +  /* The number of hypotheses to output by default. */
 +  public int topN = 1;
 +
 +  /**
 +   * This string describes the format of each line of output from the decoder (i.e., the
 +   * translations). The string can include arbitrary text and also variables. The following
 +   * variables are available:
 +   *
 +   * <pre>
 +   * - %i the 0-indexed sentence number
 +   * - %e the source string %s the translated sentence
 +   * - %S the translated sentence with some basic capitalization and denormalization
 +   * - %t the synchronous derivation
 +   * - %f the list of feature values (as name=value pairs)
 +   * - %c the model cost
 +   * - %w the weight vector
 +   * - %a the alignments between source and target words (currently unimplemented)
 +   * - %d a verbose, many-line version of the derivation
 +   * </pre>
 +   */
 +  public String outputFormat = "%i ||| %s ||| %f ||| %c";
 +
 +  /* The number of decoding threads to use (-threads). */
 +  public int num_parallel_decoders = 1;
 +
 +  // disk hg
 +  public String hypergraphFilePattern = "";
 +
 +  /*
 +   * When true, _OOV is appended to all words that are passed through (useful for something like
 +   * transliteration on the target side
 +   */
 +  public boolean mark_oovs = false;
 +
 +  /* Enables synchronous parsing. */
 +  public boolean parse = false; // perform synchronous parsing
 +
 +
 +  /* A list of the feature functions. */
 +  public ArrayList<String> features = new ArrayList<String>();
 +
 +  /* A list of weights found in the main config file (instead of in a separate weights file) */
 +  public ArrayList<String> weights = new ArrayList<String>();
 +
 +  /* Determines whether to expect JSON input or plain lines */
 +  public enum INPUT_TYPE { plain, json };
 +  public INPUT_TYPE input_type = INPUT_TYPE.plain;
 +
 +  /* Type of server. Not sure we need to keep the regular TCP one around. */
 +  public enum SERVER_TYPE { none, TCP, HTTP };
 +  public SERVER_TYPE server_type = SERVER_TYPE.TCP;
 +
 +  /* If set, Joshua will start a (multi-threaded, per "threads") TCP/IP server on this port. */
 +  public int server_port = 0;
 +
 +  /*
 +   * Whether to do forest rescoring. If set to true, the references are expected on STDIN along with
 +   * the input sentences in the following format:
 +   * 
 +   * input sentence ||| ||| reference1 ||| reference2 ...
 +   * 
 +   * (The second field is reserved for the output sentence for alignment and forced decoding).
 +   */
 +
 +  public boolean rescoreForest = false;
 +  public float rescoreForestWeight = 10.0f;
 +
 +  /*
 +   * Location of fragment mapping file, which maps flattened SCFG rules to their internal
 +   * representation.
 +   */
 +  public String fragmentMapFile = null;
 +
 +  /*
 +   * Whether to use soft syntactic constraint decoding /fuzzy matching, which allows that any
 +   * nonterminal may be substituted for any other nonterminal (except for OOV and GOAL)
 +   */
 +  public boolean fuzzy_matching = false;
 +
 +  public static final String SOFT_SYNTACTIC_CONSTRAINT_DECODING_PROPERTY_NAME = "fuzzy_matching";
 +
 +  /***
 +   * Phrase-based decoding parameters.
 +   */
 +  
 +  /* The search algorithm: currently either "cky" or "stack" */
 +  public String search_algorithm = "cky";
 +
 +  /* The distortion limit */
 +  public int reordering_limit = 8;
 +
 +  /* The number of target sides considered for each source side (after sorting by model weight) */
 +  public int num_translation_options = 20;
 +
 +  /* If true, decode using a dot chart (standard CKY+); if false, use the much more efficient
 +   * version of Sennrich (SSST 2014)
 +   */
 +  public boolean use_dot_chart = true;
 +
 +  /* Moses compatibility */
 +  public boolean moses = false;
 +
 +  /* If true, just print out the weights found in the config file, and exit. */
 +  public boolean show_weights_and_quit = false;
 +
 +  /* Read input from a file (Moses compatible flag) */
 +  public String input_file = null;
 +
 +  /* Write n-best output to this file */
 +  public String n_best_file = null;
 +
 +  /* Whether to look at source side for special annotations */
 +  public boolean source_annotations = false;
 +
 +  /* Weights overridden from the command line */
 +  public String weight_overwrite = "";
 +
 +  /**
 +   * This method resets the state of JoshuaConfiguration back to the state after initialization.
 +   * This is useful when for example making different calls to the decoder within the same java
 +   * program, which otherwise leads to potential errors due to inconsistent state as a result of
 +   * loading the configuration multiple times without resetting etc.
 +   *
 +   * This leads to the insight that in fact it may be an even better idea to refactor the code and
 +   * make JoshuaConfiguration an object that is is created and passed as an argument, rather than a
 +   * shared static object. This is just a suggestion for the next step.
 +   *
 +   */
 +  public void reset() {
 +    LOG.info("Resetting the JoshuaConfiguration to its defaults ...");
 +    LOG.info("\n\tResetting the StatefullFF global state index ...");
 +    LOG.info("\n\t...done");
 +    StatefulFF.resetGlobalStateIndex();
 +    tms = new ArrayList<String>();
 +    weights_file = "";
 +    default_non_terminal = "[X]";
 +    oovList = new ArrayList<OOVItem>();
 +    oovList.add(new OOVItem(default_non_terminal, 1.0f));
 +    goal_symbol = "[GOAL]";
 +    amortized_sorting = true;
 +    constrain_parse = false;
 +    use_pos_labels = false;
 +    true_oovs_only = false;
 +    filter_grammar = false;
 +    pop_limit = 100;
 +    maxlen = 200;
 +    use_unique_nbest = false;
 +    include_align_index = false;
 +    topN = 1;
 +    outputFormat = "%i ||| %s ||| %f ||| %c";
 +    num_parallel_decoders = 1;
 +    hypergraphFilePattern = "";
 +    mark_oovs = false;
 +    // oracleFile = null;
 +    parse = false; // perform synchronous parsing
 +    features = new ArrayList<String>();
 +    weights = new ArrayList<String>();
 +    server_port = 0;
 +
 +    reordering_limit = 8;
 +    num_translation_options = 20;
 +    LOG.info("...done");
 +  }
 +
 +  // ===============================================================
 +  // Methods
 +  // ===============================================================
 +
 +  /**
 +   * To process command-line options, we write them to a file that looks like the config file, and
 +   * then call readConfigFile() on it. It would be more general to define a class that sits on a
 +   * stream and knows how to chop it up, but this was quicker to implement.
 +   * 
 +   * @param options string array of command line options
 +   */
 +  public void processCommandLineOptions(String[] options) {
 +    try {
 +      File tmpFile = File.createTempFile("options", null, null);
 +      PrintWriter out = new PrintWriter(new FileWriter(tmpFile));
 +
 +      for (int i = 0; i < options.length; i++) {
 +        String key = options[i].substring(1);
 +        if (i + 1 == options.length || options[i + 1].startsWith("-")) {
 +          // if this is the last item, or if the next item
 +          // is another flag, then this is a boolean flag
 +          out.println(key + " = true");
 +
 +        } else {
 +          out.print(key + " =");
 +          while (i + 1 < options.length && ! options[i + 1].startsWith("-")) {
 +            out.print(String.format(" %s", options[i + 1]));
 +            i++;
 +          }
 +          out.println();
 +        }
 +      }
 +      out.close();
 +      this.readConfigFile(tmpFile.getCanonicalPath());
 +
 +      tmpFile.delete();
 +
 +    } catch (IOException e) {
 +      throw new RuntimeException(e);
 +    }
 +  }
 +
 +  public void readConfigFile(String configFile) throws IOException {
 +
 +    LineReader configReader = new LineReader(configFile, false);
 +    try {
 +      for (String line : configReader) {
 +        line = line.trim(); // .toLowerCase();
 +
 +        if (Regex.commentOrEmptyLine.matches(line))
 +          continue;
 +
 +        /*
 +         * There are two kinds of substantive (non-comment, non-blank) lines: parameters and feature
 +         * values. Parameters match the pattern "key = value"; all other substantive lines are
 +         * interpreted as features.
 +         */
 +
 +        if (line.indexOf("=") != -1) { // parameters; (not feature function)
 +          String[] fds = Regex.equalsWithSpaces.split(line, 2);
 +          if (fds.length < 2) {
 +            LOG.warn("skipping config file line '{}'", line);
 +            continue;
 +          }
 +
 +          String parameter = normalize_key(fds[0]);
 +
 +          if (parameter.equals(normalize_key("lm"))) {
 +            /* This is deprecated. This support old LM lines of the form
 +             * 
 +             *   lm = berkeleylm 5 false false 100 lm.gz
 +             * 
 +             * LMs are now loaded as general feature functions, so we transform that to either
 +             * 
-              *   feature-function = LanguageModel -lm_order 5 -lm_type berkeleylm -lm_file lm.gz
++             *   LanguageModel -lm_order 5 -lm_type berkeleylm -lm_file lm.gz
 +             * 
 +             * If the line were state minimizing:
 +             * 
 +             *   lm = kenlm 5 true false 100 lm.gz
 +             *              
-              * feature-function = StateMinimizingLanguageModel -lm_order 5 -lm_file lm.gz
++             * StateMinimizingLanguageModel -lm_order 5 -lm_file lm.gz
 +             */
 +
 +            String[] tokens = fds[1].split("\\s+");
 +            if (tokens[2].equals("true"))
-               features.add(String.format("feature_function = StateMinimizingLanguageModel -lm_type kenlm -lm_order %s -lm_file %s",
++              features.add(String.format("StateMinimizingLanguageModel -lm_type kenlm -lm_order %s -lm_file %s",
 +                  tokens[1], tokens[5]));
 +            else
-               features.add(String.format("feature_function = LanguageModel -lm_type %s -lm_order %s -lm_file %s",
++              features.add(String.format("LanguageModel -lm_type %s -lm_order %s -lm_file %s",
 +                  tokens[0], tokens[1], tokens[5]));
 +
 +          } else if (parameter.equals(normalize_key("tm"))) {
 +            /* If found, convert old format:
 +             *   tm = TYPE OWNER MAXSPAN PATH
 +             * to new format
 +             *   tm = TYPE -owner OWNER -maxspan MAXSPAN -path PATH    
 +             */
 +            String tmLine = fds[1];
 +
 +            String[] tokens = fds[1].split("\\s+");
 +            if (! tokens[1].startsWith("-")) { // old format
 +              tmLine = String.format("%s -owner %s -maxspan %s -path %s", tokens[0], tokens[1], tokens[2], tokens[3]);
 +              LOG.warn("Converting deprecated TM line from '{}' -> '{}'", fds[1], tmLine);
 +            }
 +            tms.add(tmLine);
 +
 +          } else if (parameter.equals("v")) {
 +            Decoder.VERBOSE = Integer.parseInt(fds[1]);
 +
 +          } else if (parameter.equals(normalize_key("parse"))) {
 +            parse = Boolean.parseBoolean(fds[1]);
 +            LOG.debug("parse: {}", parse);
 +
 +          } else if (parameter.equals(normalize_key("dump-hypergraph"))) {
 +            hypergraphFilePattern = fds[1].trim();
 +            LOG.debug("  hypergraph dump file format: {}", hypergraphFilePattern);
 +
 +          } else if (parameter.equals(normalize_key("oov-list"))) {
 +            if (new File(fds[1]).exists()) {
 +              oovList = new ArrayList<OOVItem>();
 +              try {
 +                File file = new File(fds[1]);
 +                BufferedReader br = new BufferedReader(new FileReader(file));
 +                try {
 +                  String str = br.readLine();
 +                  while (str != null) {
 +                    String[] tokens = str.trim().split("\\s+");
 +
 +                    oovList.add(new OOVItem(FormatUtils.ensureNonTerminalBrackets(tokens[0]),
 +                            (float) Math.log(Float.parseFloat(tokens[1]))));
 +
 +                    str = br.readLine();
 +                  }
 +                  br.close();
 +                } catch(IOException e){
 +                  System.out.println(e);
 +                }
 +              } catch(IOException e){
 +                System.out.println(e);
 +              }
 +              Collections.sort(oovList);
 +
 +            } else {
 +              String[] tokens = fds[1].trim().split("\\s+");
 +              if (tokens.length % 2 != 0) {
 +                throw new RuntimeException(String.format("* FATAL: invalid format for '%s'", fds[0]));
 +              }
 +              oovList = new ArrayList<OOVItem>();
 +
 +              for (int i = 0; i < tokens.length; i += 2)
 +                oovList.add(new OOVItem(FormatUtils.ensureNonTerminalBrackets(tokens[i]),
 +                    (float) Math.log(Float.parseFloat(tokens[i + 1]))));
 +
 +              Collections.sort(oovList);
 +            }
 +
 +          } else if (parameter.equals(normalize_key("lattice-decoding"))) {
 +            lattice_decoding = true;
 +
 +          } else if (parameter.equals(normalize_key("segment-oovs"))) {
 +            segment_oovs = true;
 +            lattice_decoding = true;
 +
 +          } else if (parameter.equals(normalize_key("default-non-terminal"))) {
 +            default_non_terminal = ensureNonTerminalBrackets(cleanNonTerminal(fds[1].trim()));
 +            LOG.debug("default_non_terminal: {}", default_non_terminal);
 +
 +          } else if (parameter.equals(normalize_key("goal-symbol"))) {
 +            goal_symbol = ensureNonTerminalBrackets(cleanNonTerminal(fds[1].trim()));
 +            LOG.debug("goalSymbol: {}", goal_symbol);
 +
 +          } else if (parameter.equals(normalize_key("weights-file"))) {
 +            weights_file = fds[1];
 +
 +          } else if (parameter.equals(normalize_key("constrain_parse"))) {
 +            constrain_parse = Boolean.parseBoolean(fds[1]);
 +
 +          } else if (parameter.equals(normalize_key("true_oovs_only"))) {
 +            true_oovs_only = Boolean.parseBoolean(fds[1]);
 +
 +          } else if (parameter.equals(normalize_key("filter-grammar"))) {
 +            filter_grammar = Boolean.parseBoolean(fds[1]);
 +
 +          } else if (parameter.equals(normalize_key("amortize"))) {
 +            amortized_sorting = Boolean.parseBoolean(fds[1]);
 +
 +          } else if (parameter.equals(normalize_key("use_pos_labels"))) {
 +            use_pos_labels = Boolean.parseBoolean(fds[1]);
 +
 +          } else if (parameter.equals(normalize_key("use_unique_nbest"))) {
 +            use_unique_nbest = Boolean.valueOf(fds[1]);
 +            LOG.debug("use_unique_nbest: {}", use_unique_nbest);
 +
 +          } else if (parameter.equals(normalize_key("output-format"))) {
 +            outputFormat = fds[1];
 +            LOG.debug("output-format: {}", outputFormat);
 +
 +          } else if (parameter.equals(normalize_key("include_align_index"))) {
 +            include_align_index = Boolean.valueOf(fds[1]);
 +            LOG.debug("include_align_index: {}", include_align_index);
 +
 +          } else if (parameter.equals(normalize_key("top_n"))) {
 +            topN = Integer.parseInt(fds[1]);
 +            LOG.debug("topN: {}", topN);
 +
 +          } else if (parameter.equals(normalize_key("num_parallel_decoders"))
 +              || parameter.equals(normalize_key("threads"))) {
 +            num_parallel_decoders = Integer.parseInt(fds[1]);
 +            if (num_parallel_decoders <= 0) {
 +              throw new IllegalArgumentException(
 +                  "Must specify a positive number for num_parallel_decoders");
 +            }
 +            LOG.debug("num_parallel_decoders: {}", num_parallel_decoders);
 +
 +          } else if (parameter.equals(normalize_key("mark_oovs"))) {
 +            mark_oovs = Boolean.valueOf(fds[1]);
 +            LOG.debug("mark_oovs: {}", mark_oovs);
 +
 +          } else if (parameter.equals(normalize_key("pop-limit"))) {
 +            pop_limit = Integer.parseInt(fds[1]);
 +            LOG.info("pop-limit: {}", pop_limit);
 +
 +          } else if (parameter.equals(normalize_key("input-type"))) {
 +            if (fds[1].equals("json")) {
 +              input_type = INPUT_TYPE.json;
 +            } else if (fds[1].equals("plain")) {
 +              input_type = INPUT_TYPE.plain;
 +            } else {
 +              throw new RuntimeException(String.format("* FATAL: invalid server type '%s'", fds[1]));
 +            }
 +            LOG.info("    input-type: {}", input_type);
 +
 +          } else if (parameter.equals(normalize_key("server-type"))) {
 +            if (fds[1].toLowerCase().equals("tcp"))
 +              server_type = SERVER_TYPE.TCP;
 +            else if (fds[1].toLowerCase().equals("http"))
 +              server_type = SERVER_TYPE.HTTP;
 +
 +            LOG.info("    server-type: {}", server_type);
 +
 +          } else if (parameter.equals(normalize_key("server-port"))) {
 +            server_port = Integer.parseInt(fds[1]);
 +            LOG.info("    server-port: {}", server_port);
 +
 +          } else if (parameter.equals(normalize_key("rescore-forest"))) {
 +            rescoreForest = true;
 +            LOG.info("    rescore-forest: {}", rescoreForest);
 +
 +          } else if (parameter.equals(normalize_key("rescore-forest-weight"))) {
 +            rescoreForestWeight = Float.parseFloat(fds[1]);
 +            LOG.info("    rescore-forest-weight: {}", rescoreForestWeight);
 +
 +          } else if (parameter.equals(normalize_key("maxlen"))) {
 +            // reset the maximum length
 +            maxlen = Integer.parseInt(fds[1]);
 +
 +          } else if (parameter.equals("c") || parameter.equals("config")) {
 +            // this was used to send in the config file, just ignore it
 +            ;
 +
 +          } else if (parameter.equals(normalize_key("feature-function"))) {
 +            // add the feature to the list of features for later processing
-             features.add("feature_function = " + fds[1]);
++            features.add(fds[1]);
 +
 +          } else if (parameter.equals(normalize_key("maxlen"))) {
 +            // add the feature to the list of features for later processing
 +            maxlen = Integer.parseInt(fds[1]);
 +
 +          } else if (parameter
 +              .equals(normalize_key(SOFT_SYNTACTIC_CONSTRAINT_DECODING_PROPERTY_NAME))) {
 +            fuzzy_matching = Boolean.parseBoolean(fds[1]);
 +            LOG.debug("fuzzy_matching: {}", fuzzy_matching);
 +
 +          } else if (parameter.equals(normalize_key("fragment-map"))) {
 +            fragmentMapFile = fds[1];
 +            Tree.readMapping(fragmentMapFile);
 +
 +            /** PHRASE-BASED PARAMETERS **/
 +          } else if (parameter.equals(normalize_key("search"))) {
 +            search_algorithm = fds[1];
 +
 +            if (!search_algorithm.equals("cky") && !search_algorithm.equals("stack")) {
 +              throw new RuntimeException(
 +                  "-search must be one of 'stack' (for phrase-based decoding) " +
 +                      "or 'cky' (for hierarchical / syntactic decoding)");
 +            }
 +
 +            if (search_algorithm.equals("cky") && include_align_index) {
 +              throw new RuntimeException(
 +                  "include_align_index is currently not supported with cky search");
 +            }
 +
 +          } else if (parameter.equals(normalize_key("reordering-limit"))) {
 +            reordering_limit = Integer.parseInt(fds[1]);
 +
 +          } else if (parameter.equals(normalize_key("num-translation-options"))) {
 +            num_translation_options = Integer.parseInt(fds[1]);
 +
 +          } else if (parameter.equals(normalize_key("no-dot-chart"))) {
 +            use_dot_chart = false;
 +
 +          } else if (parameter.equals(normalize_key("moses"))) {
 +            moses = true; // triggers some Moses-specific compatibility options
 +
 +          } else if (parameter.equals(normalize_key("show-weights"))) {
 +            show_weights_and_quit = true;
 +
 +          } else if (parameter.equals(normalize_key("n-best-list"))) {
 +            // for Moses compatibility
 +            String[] tokens = fds[1].split("\\s+");
 +            n_best_file = tokens[0];
 +            if (tokens.length > 1)
 +              topN = Integer.parseInt(tokens[1]);
 +
 +          } else if (parameter.equals(normalize_key("input-file"))) {
 +            // for Moses compatibility
 +            input_file = fds[1];
 +
 +          } else if (parameter.equals(normalize_key("weight-file"))) {
 +            // for Moses, ignore
 +
 +          } else if (parameter.equals(normalize_key("weight-overwrite"))) {
 +            weight_overwrite = fds[1];
 +
 +          } else if (parameter.equals(normalize_key("source-annotations"))) {
 +            // Check source sentence
 +            source_annotations = true;
 +
 +          } else if (parameter.equals(normalize_key("cached-rules-size"))) {
 +            // Check source sentence
 +            cachedRuleSize = Integer.parseInt(fds[1]);
 +          } else if (parameter.equals(normalize_key("lowercase"))) {
 +            lowercase = true;
 +
 +          } else if (parameter.equals(normalize_key("project-case"))) {
 +            project_case = true;
 +
 +          } else {
 +
 +            if (parameter.equals(normalize_key("use-sent-specific-tm"))
 +                || parameter.equals(normalize_key("add-combined-cost"))
 +                || parameter.equals(normalize_key("use-tree-nbest"))
 +                || parameter.equals(normalize_key("use-kenlm"))
 +                || parameter.equals(normalize_key("useCubePrune"))
 +                || parameter.equals(normalize_key("useBeamAndThresholdPrune"))
 +                || parameter.equals(normalize_key("regexp-grammar"))) {
 +              LOG.warn("ignoring deprecated parameter '{}'", fds[0]);
 +
 +            } else {
 +              throw new RuntimeException("FATAL: unknown configuration parameter '" + fds[0] + "'");
 +            }
 +          }
 +
 +          LOG.info("    {} = '{}'", normalize_key(fds[0]), fds[1]);
 +
 +        } else {
 +          /*
 +           * Lines that don't have an equals sign and are not blank lines, empty lines, or comments,
 +           * are feature values, which can be present in this file
 +           */
 +
 +          weights.add(line);
 +        }
 +      }
 +    } finally {
 +      configReader.close();
 +    }
 +  }
 +
 +  /**
 +   * Checks for invalid variable configurations
 +   */
 +  public void sanityCheck() {
 +  }
 +
 +  /**
 +   * Normalizes parameter names by removing underscores and hyphens and lowercasing. This defines
 +   * equivalence classes on external use of parameter names, permitting arbitrary_under_scores and
 +   * camelCasing in paramter names without forcing the user to memorize them all. Here are some
 +   * examples of equivalent ways to refer to parameter names:
 +   * <pre>
 +   * {pop-limit, poplimit, PopLimit, popLimit, pop_lim_it} {lmfile, lm-file, LM-FILE, lm_file}
 +   * </pre>
 +   * 
 +   * @param text the string to be normalized
 +   * @return normalized key
 +   * 
 +   */
 +  public static String normalize_key(String text) {
 +    return text.replaceAll("[-_]", "").toLowerCase();
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/5c0d5388/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
index 69584dd,0000000..e53e19f
mode 100644,000000..100644
--- a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
@@@ -1,117 -1,0 +1,118 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff;
 +
 +import java.util.ArrayList;
 +import java.util.HashMap;
 +import java.util.List;
 +
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.JoshuaConfiguration.OOVItem;
 +import org.apache.joshua.decoder.ff.state_maintenance.DPState;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.hypergraph.HGNode;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.chart_parser.SourcePath;
 +
 +/**
 + * This feature is fired when an out-of-vocabulary word (with respect to the translation model) is
 + * entered into the chart. OOVs work in the following manner: for each word in the input that is OOV
 + * with respect to the translation model, we create a rule that pushes that word through
 + * untranslated (the suffix "_OOV" can optionally be appended according to the runtime parameter
 + * "mark-oovs") . These rules are all stored in a grammar whose owner is "oov". The OOV feature
 + * function template then fires the "OOVPenalty" feature whenever it is asked to score an OOV rule.
 + * 
 + * @author Matt Post post@cs.jhu.edu
 + */
 +public class OOVPenalty extends StatelessFF {
-   private int ownerID = -1;
++  private final int ownerID;
 +  
 +  /* The default value returned for OOVs. Can be overridden with -oov-list */
-   private float defaultValue = -100f;
-   private HashMap<Integer,Float> oovWeights = null;
++  private final float defaultValue = -100f;
++  private final HashMap<Integer,Float> oovWeights;
 +
 +  public OOVPenalty(FeatureVector weights, String[] args, JoshuaConfiguration config) {
 +    super(weights, "OOVPenalty", args, config);
 +
 +    ownerID = Vocabulary.id("oov");
 +    oovWeights = new HashMap<Integer,Float>();
 +    
-     if (config.oovList != null)
-       for (OOVItem item: config.oovList) 
++    if (config.oovList != null) {
++      for (OOVItem item: config.oovList) { 
 +        oovWeights.put(Vocabulary.id(item.label), item.weight);
++      }
++    }
 +  }
 +  
 +  @Override
 +  public ArrayList<String> reportDenseFeatures(int index) {
 +    denseFeatureIndex = index;
 +    
-     ArrayList<String> names = new ArrayList<String>();
++    ArrayList<String> names = new ArrayList<>(1);
 +    names.add(name);
 +    return names;
 +  }
 +
 +  /**
 +   * OOV rules cover exactly one word, and such rules belong to a grammar whose owner is "oov". Each
 +   * OOV fires the OOVPenalty feature with a value of 1, so the cost is simply the weight, which was
 +   * cached when the feature was created.
 +   */
 +  @Override
 +  public DPState compute(Rule rule, List<HGNode> tailNodes, int i, int j, SourcePath sourcePath,
 +      Sentence sentence, Accumulator acc) {
 +    
 +    if (rule != null && this.ownerID == rule.getOwner()) {
- //      acc.add(name, getValue(rule.getLHS()));
 +      acc.add(denseFeatureIndex, getValue(rule.getLHS()));
 +    }
 +
 +    return null;
 +  }
 +  
 +  /**
 +   * It's important for the OOV feature to contribute to the rule's estimated cost, so that OOV
 +   * rules (which are added for all words, not just ones without translation options) get sorted
 +   * to the bottom during cube pruning.
 +   * 
 +   * Important! estimateCost returns the *weighted* feature value.
 +   */
 +  @Override
 +  public float estimateCost(Rule rule, Sentence sentence) {
 +    if (rule != null && this.ownerID == rule.getOwner())
 +      return weights.getDense(denseFeatureIndex) * getValue(rule.getLHS());
 +    return 0.0f;
 +  }
 +  
 +  private float getValue(int lhs) {
 +    return oovWeights.containsKey(lhs) ? oovWeights.get(lhs) : defaultValue;
 +  }
 +
 +  @Override
 +  public double estimateLogP(Rule rule, int sentID) {
 +    // TODO Auto-generated method stub
 +    return 0;
 +  }
 +
 +  @Override
 +  public double getWeight() {
 +    // TODO Auto-generated method stub
 +    return 0;
 +  }
 +}