You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/19 21:33:59 UTC

[11/51] [partial] incubator-joshua git commit: Converted KenLM into a submodule

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/pipeline.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/pipeline.cc b/ext/kenlm/lm/interpolate/pipeline.cc
deleted file mode 100644
index 47b8288..0000000
--- a/ext/kenlm/lm/interpolate/pipeline.cc
+++ /dev/null
@@ -1,159 +0,0 @@
-#include "lm/interpolate/pipeline.hh"
-
-#include "lm/common/compare.hh"
-#include "lm/common/print.hh"
-#include "lm/common/renumber.hh"
-#include "lm/vocab.hh"
-#include "lm/interpolate/backoff_reunification.hh"
-#include "lm/interpolate/interpolate_info.hh"
-#include "lm/interpolate/merge_probabilities.hh"
-#include "lm/interpolate/merge_vocab.hh"
-#include "lm/interpolate/normalize.hh"
-#include "lm/interpolate/universal_vocab.hh"
-#include "util/stream/chain.hh"
-#include "util/stream/count_records.hh"
-#include "util/stream/io.hh"
-#include "util/stream/multi_stream.hh"
-#include "util/stream/sort.hh"
-#include "util/fixed_array.hh"
-
-namespace lm { namespace interpolate { namespace {
-
-/* Put the original input files on chains and renumber them */
-void SetupInputs(std::size_t buffer_size, const UniversalVocab &vocab, util::FixedArray<ModelBuffer> &models, bool exclude_highest, util::FixedArray<util::stream::Chains> &chains, util::FixedArray<util::stream::ChainPositions> &positions) {
-  chains.clear();
-  positions.clear();
-  // TODO: much better memory sizing heuristics e.g. not making the chain larger than it will use.
-  util::stream::ChainConfig config(0, 2, buffer_size);
-  for (std::size_t i = 0; i < models.size(); ++i) {
-    chains.push_back(models[i].Order() - exclude_highest);
-    for (std::size_t j = 0; j < models[i].Order() - exclude_highest; ++j) {
-      config.entry_size = sizeof(WordIndex) * (j + 1) + sizeof(float) * 2; // TODO do not include wasteful backoff for highest.
-      chains.back().push_back(config);
-    }
-    models[i].Source(chains.back());
-    for (std::size_t j = 0; j < models[i].Order() - exclude_highest; ++j) {
-      chains[i][j] >> Renumber(vocab.Mapping(i), j + 1);
-    }
-  }
- for (std::size_t i = 0; i < chains.size(); ++i) {
-    positions.push_back(chains[i]);
-  }
-}
-
-template <class SortOrder> void ApplySort(const util::stream::SortConfig &config, util::stream::Chains &chains) {
-  util::stream::Sorts<SortOrder> sorts(chains.size());
-  for (std::size_t i = 0; i < chains.size(); ++i) {
-    sorts.push_back(chains[i], config, SortOrder(i + 1));
-  }
-  chains.Wait(true);
-  // TODO memory management
-  for (std::size_t i = 0; i < sorts.size(); ++i) {
-    sorts[i].Merge(sorts[i].DefaultLazy());
-  }
-  for (std::size_t i = 0; i < sorts.size(); ++i) {
-    sorts[i].Output(chains[i], sorts[i].DefaultLazy());
-  }
-};
-
-} // namespace
-
-void Pipeline(util::FixedArray<ModelBuffer> &models, const Config &config, int write_file) {
-  // Setup InterpolateInfo and UniversalVocab.
-  InterpolateInfo info;
-  info.lambdas = config.lambdas;
-  std::vector<WordIndex> vocab_sizes;
-
-  util::scoped_fd vocab_null(util::MakeTemp(config.sort.temp_prefix));
-  std::size_t max_order = 0;
-  util::FixedArray<util::scoped_fd> vocab_files(models.size());
-  for (ModelBuffer *i = models.begin(); i != models.end(); ++i) {
-    info.orders.push_back(i->Order());
-    vocab_sizes.push_back(i->Counts()[0]);
-    vocab_files.push_back(util::DupOrThrow(i->VocabFile()));
-    max_order = std::max(max_order, i->Order());
-  }
-  UniversalVocab vocab(vocab_sizes);
-  {
-    ngram::ImmediateWriteWordsWrapper writer(NULL, vocab_null.get(), 0);
-    MergeVocab(vocab_files, vocab, writer);
-  }
-  vocab_files.clear();
-
-  std::cerr << "Merging probabilities." << std::endl;
-  // Pass 1: merge probabilities
-  util::FixedArray<util::stream::Chains> input_chains(models.size());
-  util::FixedArray<util::stream::ChainPositions> models_by_order(models.size());
-  SetupInputs(config.BufferSize(), vocab, models, false, input_chains, models_by_order);
-
-  util::stream::Chains merged_probs(max_order);
-  for (std::size_t i = 0; i < max_order; ++i) {
-    merged_probs.push_back(util::stream::ChainConfig(PartialProbGamma::TotalSize(info, i + 1), 2, config.BufferSize())); // TODO: not buffer_size
-  }
-  MergeProbabilities(info, models_by_order, merged_probs);
-  std::vector<uint64_t> counts(max_order);
-  for (std::size_t i = 0; i < max_order; ++i) {
-    merged_probs[i] >> util::stream::CountRecords(&counts[i]);
-  }
-
-  // Pass 2: normalize.
-  ApplySort<ContextOrder>(config.sort, merged_probs);
-  std::cerr << "Normalizing" << std::endl;
-  SetupInputs(config.BufferSize(), vocab, models, true, input_chains, models_by_order);
-  util::stream::Chains probabilities(max_order), backoffs(max_order - 1);
-  std::size_t block_count = 2;
-  for (std::size_t i = 0; i < max_order; ++i) {
-    // Careful accounting to ensure RewindableStream can fit the entire vocabulary.
-    block_count = std::max<std::size_t>(block_count, 2);
-    // This much needs to fit in RewindableStream.
-    std::size_t fit = NGram<float>::TotalSize(i + 1) * counts[0];
-    // fit / (block_count - 1) rounded up
-    std::size_t min_block = (fit + block_count - 2) / (block_count - 1);
-    std::size_t specify = std::max(config.BufferSize(), min_block * block_count);
-    probabilities.push_back(util::stream::ChainConfig(NGram<float>::TotalSize(i + 1), block_count, specify));
-  }
-  for (std::size_t i = 0; i < max_order - 1; ++i) {
-    backoffs.push_back(util::stream::ChainConfig(sizeof(float), 2, config.BufferSize()));
-  }
-  Normalize(info, models_by_order, merged_probs, probabilities, backoffs);
-
-  util::FixedArray<util::stream::FileBuffer> backoff_buffers(backoffs.size());
-  for (std::size_t i = 0; i < max_order - 1; ++i) {
-    backoff_buffers.push_back(util::MakeTemp(config.sort.temp_prefix));
-    backoffs[i] >> backoff_buffers.back().Sink();
-  }
-
-  // Pass 3: backoffs in the right place.
-  ApplySort<SuffixOrder>(config.sort, probabilities);
-  // TODO destroy universal vocab to save RAM.
-  // TODO these should be freed before merge sort happens in the above function.
-  backoffs.Wait(true);
-  merged_probs.Wait(true);
-  std::cerr << "Reunifying backoffs" << std::endl;
-
-  util::stream::ChainPositions prob_pos(max_order - 1);
-  util::stream::Chains combined(max_order - 1);
-  for (std::size_t i = 0; i < max_order - 1; ++i) {
-    backoffs[i] >> backoff_buffers[i].Source(true);
-    prob_pos.push_back(probabilities[i].Add());
-    combined.push_back(util::stream::ChainConfig(NGram<ProbBackoff>::TotalSize(i + 1), 2, config.BufferSize()));
-  }
-  util::stream::ChainPositions backoff_pos(backoffs);
-
-  ReunifyBackoff(prob_pos, backoff_pos, combined);
-
-  util::stream::ChainPositions output_pos(max_order);
-  for (std::size_t i = 0; i < max_order - 1; ++i) {
-    output_pos.push_back(combined[i].Add());
-  }
-  output_pos.push_back(probabilities.back().Add());
-
-  probabilities >> util::stream::kRecycle;
-  backoffs >> util::stream::kRecycle;
-  combined >> util::stream::kRecycle;
-
-  // TODO genericize to ModelBuffer etc.
-  PrintARPA(vocab_null.get(), write_file, counts).Run(output_pos);
-}
-
-}} // namespaces

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/pipeline.hh
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/pipeline.hh b/ext/kenlm/lm/interpolate/pipeline.hh
deleted file mode 100644
index b200248..0000000
--- a/ext/kenlm/lm/interpolate/pipeline.hh
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef LM_INTERPOLATE_PIPELINE_H
-#define LM_INTERPOLATE_PIPELINE_H
-
-#include "lm/common/model_buffer.hh"
-#include "util/fixed_array.hh"
-#include "util/stream/config.hh"
-
-#include <cstddef>
-#include <string>
-
-namespace lm { namespace interpolate {
-
-struct Config {
-  std::vector<float> lambdas;
-  util::stream::SortConfig sort;
-  std::size_t BufferSize() const { return sort.buffer_size; }
-};
-
-void Pipeline(util::FixedArray<ModelBuffer> &models, const Config &config, int write_file);
-
-}} // namespaces
-#endif // LM_INTERPOLATE_PIPELINE_H

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/split_worker.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/split_worker.cc b/ext/kenlm/lm/interpolate/split_worker.cc
deleted file mode 100644
index e777bf0..0000000
--- a/ext/kenlm/lm/interpolate/split_worker.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "lm/interpolate/split_worker.hh"
-#include "lm/common/ngram.hh"
-
-namespace lm {
-namespace interpolate {
-
-SplitWorker::SplitWorker(std::size_t order, util::stream::Chain &backoff_chain,
-                         util::stream::Chain &sort_chain)
-    : order_(order) {
-  backoff_chain >> backoff_input_;
-  sort_chain >> sort_input_;
-}
-
-void SplitWorker::Run(const util::stream::ChainPosition &position) {
-  // input: ngram record (id, prob, and backoff)
-  // output: a float to the backoff_input stream
-  //         an ngram id and a float to the sort_input stream
-  for (util::stream::Stream stream(position); stream; ++stream) {
-    NGram<ProbBackoff> ngram(stream.Get(), order_);
-
-    // write id and prob to the sort stream
-    float prob = ngram.Value().prob;
-    lm::WordIndex *out = reinterpret_cast<lm::WordIndex *>(sort_input_.Get());
-    for (const lm::WordIndex *it = ngram.begin(); it != ngram.end(); ++it) {
-      *out++ = *it;
-    }
-    *reinterpret_cast<float *>(out) = prob;
-    ++sort_input_;
-
-    // write backoff to the backoff output stream
-    float boff = ngram.Value().backoff;
-    *reinterpret_cast<float *>(backoff_input_.Get()) = boff;
-    ++backoff_input_;
-  }
-  sort_input_.Poison();
-  backoff_input_.Poison();
-}
-
-}
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/split_worker.hh
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/split_worker.hh b/ext/kenlm/lm/interpolate/split_worker.hh
deleted file mode 100644
index 15fae68..0000000
--- a/ext/kenlm/lm/interpolate/split_worker.hh
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef KENLM_INTERPOLATE_SPLIT_WORKER_H_
-#define KENLM_INTERPOLATE_SPLIT_WORKER_H_
-
-#include "util/stream/chain.hh"
-#include "util/stream/stream.hh"
-
-namespace lm {
-namespace interpolate {
-
-class SplitWorker {
-  public:
-    /**
-     * Constructs a split worker for a particular order. It writes the
-     * split-off backoff values to the backoff chain and the ngram id and
-     * probability to the sort chain for each ngram in the input.
-     */
-    SplitWorker(std::size_t order, util::stream::Chain &backoff_chain,
-                util::stream::Chain &sort_chain);
-
-    /**
-     * The callback invoked to handle the input from the ngram intermediate
-     * files.
-     */
-    void Run(const util::stream::ChainPosition& position);
-
-  private:
-    /**
-     * The ngram order we are reading/writing for.
-     */
-    std::size_t order_;
-
-    /**
-     * The stream to write to for the backoff values.
-     */
-    util::stream::Stream backoff_input_;
-
-    /**
-     * The stream to write to for the ngram id + probability values.
-     */
-    util::stream::Stream sort_input_;
-};
-}
-}
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/streaming_example_main.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/streaming_example_main.cc b/ext/kenlm/lm/interpolate/streaming_example_main.cc
deleted file mode 100644
index 1f543cb..0000000
--- a/ext/kenlm/lm/interpolate/streaming_example_main.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-#include "lm/common/compare.hh"
-#include "lm/common/model_buffer.hh"
-#include "lm/common/ngram.hh"
-#include "util/stream/chain.hh"
-#include "util/stream/multi_stream.hh"
-#include "util/stream/sort.hh"
-#include "lm/interpolate/split_worker.hh"
-
-#include <boost/program_options.hpp>
-#include <boost/version.hpp>
-
-#if defined(_WIN32) || defined(_WIN64)
-
-// Windows doesn't define <unistd.h>
-//
-// So we define what we need here instead:
-//
-#define STDIN_FILENO = 0
-#define STDOUT_FILENO = 1
-#else // Huzzah for POSIX!
-#include <unistd.h>
-#endif
-
-/*
- * This is a simple example program that takes in intermediate
- * suffix-sorted ngram files and outputs two sets of files: one for backoff
- * probability values (raw numbers, in suffix order) and one for
- * probability values (ngram id and probability, in *context* order)
- */
-int main(int argc, char *argv[]) {
-  using namespace lm::interpolate;
-
-  const std::size_t ONE_GB = 1 << 30;
-  const std::size_t SIXTY_FOUR_MB = 1 << 26;
-  const std::size_t NUMBER_OF_BLOCKS = 2;
-
-  std::string FILE_NAME = "ngrams";
-  std::string CONTEXT_SORTED_FILENAME = "csorted-ngrams";
-  std::string BACKOFF_FILENAME = "backoffs";
-  std::string TMP_DIR = "/tmp/";
-
-  try {
-    namespace po = boost::program_options;
-    po::options_description options("canhazinterp Pass-3 options");
-
-    options.add_options()
-      ("help,h", po::bool_switch(), "Show this help message")
-      ("ngrams,n", po::value<std::string>(&FILE_NAME), "ngrams file")
-      ("csortngrams,c", po::value<std::string>(&CONTEXT_SORTED_FILENAME), "context sorted ngrams file")
-      ("backoffs,b", po::value<std::string>(&BACKOFF_FILENAME), "backoffs file")
-      ("tmpdir,t", po::value<std::string>(&TMP_DIR), "tmp dir");
-    po::variables_map vm;
-    po::store(po::parse_command_line(argc, argv, options), vm);
-
-    // Display help
-    if(vm["help"].as<bool>()) {
-      std::cerr << "Usage: " << options << std::endl;
-      return 1;
-    }
-  }
-  catch(const std::exception &e) {
-
-    std::cerr << e.what() << std::endl;
-    return 1;
-
-  }
-
-  // The basic strategy here is to have three chains:
-  // - The first reads the ngram order inputs using ModelBuffer. Those are
-  //   then stripped of their backoff values and fed into the third chain;
-  //   the backoff values *themselves* are written to the second chain.
-  //
-  // - The second chain takes the backoff values and writes them out to a
-  //   file (one for each order).
-  //
-  // - The third chain takes just the probability values and ngrams and
-  //   writes them out, sorted in context-order, to a file (one for each
-  //   order).
-
-  // This will be used to read in the binary intermediate files. There is
-  // one file per order (e.g. ngrams.1, ngrams.2, ...)
-  lm::ModelBuffer buffer(FILE_NAME);
-
-  // Create a separate chains for each ngram order for:
-  // - Input from the intermediate files
-  // - Output to the backoff file
-  // - Output to the (context-sorted) probability file
-  util::stream::Chains ngram_inputs(buffer.Order());
-  util::stream::Chains backoff_chains(buffer.Order());
-  util::stream::Chains prob_chains(buffer.Order());
-  for (std::size_t i = 0; i < buffer.Order(); ++i) {
-    ngram_inputs.push_back(util::stream::ChainConfig(
-        lm::NGram<lm::ProbBackoff>::TotalSize(i + 1), NUMBER_OF_BLOCKS, ONE_GB));
-
-    backoff_chains.push_back(
-        util::stream::ChainConfig(sizeof(float), NUMBER_OF_BLOCKS, ONE_GB));
-
-    prob_chains.push_back(util::stream::ChainConfig(
-        sizeof(lm::WordIndex) * (i + 1) + sizeof(float), NUMBER_OF_BLOCKS,
-        ONE_GB));
-  }
-
-  // This sets the input for each of the ngram order chains to the
-  // appropriate file
-  buffer.Source(ngram_inputs);
-
-  util::FixedArray<util::scoped_ptr<SplitWorker> > workers(buffer.Order());
-  for (std::size_t i = 0; i < buffer.Order(); ++i) {
-    // Attach a SplitWorker to each of the ngram input chains, writing to the
-    // corresponding order's backoff and probability chains
-    workers.push_back(
-        new SplitWorker(i + 1, backoff_chains[i], prob_chains[i]));
-    ngram_inputs[i] >> boost::ref(*workers.back());
-  }
-
-  util::stream::SortConfig sort_cfg;
-  sort_cfg.temp_prefix = TMP_DIR;
-  sort_cfg.buffer_size = SIXTY_FOUR_MB;
-  sort_cfg.total_memory = ONE_GB;
-
-  // This will parallel merge sort the individual order files, putting
-  // them in context-order instead of suffix-order.
-  //
-  // Two new threads will be running, each owned by the chains[i] object.
-  // - The first executes BlockSorter.Run() to sort the n-gram entries
-  // - The second executes WriteAndRecycle.Run() to write each sorted
-  //   block to disk as a temporary file
-  util::stream::Sorts<lm::ContextOrder> sorts(buffer.Order());
-  for (std::size_t i = 0; i < prob_chains.size(); ++i) {
-    sorts.push_back(prob_chains[i], sort_cfg, lm::ContextOrder(i + 1));
-  }
-
-  // Set the sort output to be on the same chain
-  for (std::size_t i = 0; i < prob_chains.size(); ++i) {
-    // The following call to Chain::Wait()
-    //     joins the threads owned by chains[i].
-    //
-    // As such the following call won't return
-    //     until all threads owned by chains[i] have completed.
-    //
-    // The following call also resets chain[i]
-    //     so that it can be reused
-    //     (including free'ing the memory previously used by the chain)
-    prob_chains[i].Wait();
-
-    // In an ideal world (without memory restrictions)
-    //     we could merge all of the previously sorted blocks
-    //     by reading them all completely into memory
-    //     and then running merge sort over them.
-    //
-    // In the real world, we have memory restrictions;
-    //     depending on how many blocks we have,
-    //     and how much memory we can use to read from each block
-    //     (sort_config.buffer_size)
-    //     it may be the case that we have insufficient memory
-    //     to read sort_config.buffer_size of data from each block from disk.
-    //
-    // If this occurs, then it will be necessary to perform one or more rounds
-    // of merge sort on disk;
-    //     doing so will reduce the number of blocks that we will eventually
-    //     need to read from
-    //     when performing the final round of merge sort in memory.
-    //
-    // So, the following call determines whether it is necessary
-    //     to perform one or more rounds of merge sort on disk;
-    //     if such on-disk merge sorting is required, such sorting is performed.
-    //
-    // Finally, the following method launches a thread that calls
-    // OwningMergingReader.Run()
-    //     to perform the final round of merge sort in memory.
-    //
-    // Merge sort could have be invoked directly
-    //     so that merge sort memory doesn't coexist with Chain memory.
-    sorts[i].Output(prob_chains[i]);
-  }
-
-  // Create another model buffer for our output on e.g. csorted-ngrams.1,
-  // csorted-ngrams.2, ...
-  lm::ModelBuffer output_buf(CONTEXT_SORTED_FILENAME, true, false);
-  output_buf.Sink(prob_chains, buffer.Counts());
-
-  // Create a third model buffer for our backoff output on e.g. backoff.1,
-  // backoff.2, ...
-  lm::ModelBuffer boff_buf(BACKOFF_FILENAME, true, false);
-  boff_buf.Sink(backoff_chains, buffer.Counts());
-
-  // Joins all threads that chains owns,
-  //    and does a for loop over each chain object in chains,
-  //    calling chain.Wait() on each such chain object
-  ngram_inputs.Wait(true);
-  backoff_chains.Wait(true);
-  prob_chains.Wait(true);
-
-  return 0;
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/toy_data/toy.linear_interpolation.lambda1_0.4.lambda2_0.6.lm
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/toy_data/toy.linear_interpolation.lambda1_0.4.lambda2_0.6.lm b/ext/kenlm/lm/interpolate/toy_data/toy.linear_interpolation.lambda1_0.4.lambda2_0.6.lm
deleted file mode 100644
index f4bac3d..0000000
--- a/ext/kenlm/lm/interpolate/toy_data/toy.linear_interpolation.lambda1_0.4.lambda2_0.6.lm
+++ /dev/null
@@ -1,23 +0,0 @@
-
-\data\
-ngram 1=5
-ngram 2=8
-
-\1-grams:
--0.5850267	</s>
--99	<s>	-7.004176
--0.7447274	<unk>
--0.4685211	a	-7.272811
--0.6575773	b	-99
-
-\2-grams:
--0.2839967	<s> a
--0.3187588	<s> b
--0.79588	a </s>
--0.5058454	a a
--0.2773661	a b
--0.7447275	b </s>
--0.1135093	b a
--0.9586073	b b
-
-\end\

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/toy_data/toy.loglinear_interpolation.lambda1_0.4.lambda2_0.6.lm
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/toy_data/toy.loglinear_interpolation.lambda1_0.4.lambda2_0.6.lm b/ext/kenlm/lm/interpolate/toy_data/toy.loglinear_interpolation.lambda1_0.4.lambda2_0.6.lm
deleted file mode 100644
index 9874ba9..0000000
--- a/ext/kenlm/lm/interpolate/toy_data/toy.loglinear_interpolation.lambda1_0.4.lambda2_0.6.lm
+++ /dev/null
@@ -1,23 +0,0 @@
-
-\data\
-ngram 1=5
-ngram 2=8
-
-\1-grams:
--0.6190513	</s>
--99	<s>	-6.885247
--99	<unk>
--0.3394633	a	0
--0.5200813	b	0
-
-\2-grams:
--0.2807584	<s> a
--0.3222944	<s> b
--0.70763	a </s>
--0.6780174	a a
--0.2261675	a b
--0.7991036	b </s>
--0.1237299	b a
--1.050158	b b
-
-\end\

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/toy_data/toy1.lm
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/toy_data/toy1.lm b/ext/kenlm/lm/interpolate/toy_data/toy1.lm
deleted file mode 100644
index fba4263..0000000
--- a/ext/kenlm/lm/interpolate/toy_data/toy1.lm
+++ /dev/null
@@ -1,23 +0,0 @@
-
-\data\
-ngram 1=5
-ngram 2=8
-
-\1-grams:
--1.3010299957	</s>
--99	<s>	-7.446389
--99	<unk>
--0.6020599913	a	0
--0.6020599913	b	0
-
-\2-grams:
--0.15490196	<s> a
--0.5228787453	<s> b
--1	a </s>
--1.5228787453	a a
--0.0604807474	a b
--0.5228787453	b </s>
--0.3010299957	b a
--0.6989700043	b b
-
-\end\

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/toy_data/toy2.lm
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/toy_data/toy2.lm b/ext/kenlm/lm/interpolate/toy_data/toy2.lm
deleted file mode 100644
index 67c2ba1..0000000
--- a/ext/kenlm/lm/interpolate/toy_data/toy2.lm
+++ /dev/null
@@ -1,23 +0,0 @@
-
-\data\
-ngram 1=5
-ngram 2=8
-
-\1-grams:
--0.3979400087	</s>
--99	<s>	-7.446389
--99	<unk>
--0.3979400087	a	0
--0.6989700043	b	0
-
-\2-grams:
--0.3979400087	<s> a
--0.2218487496	<s> b
--0.6989700043	a </s>
--0.3010299957	a a
--0.5228787453	a b
--1	b </s>
--0.0222763947	b a
--1.3010299957	b b
-
-\end\

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/train_params_grant_main.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/train_params_grant_main.cc b/ext/kenlm/lm/interpolate/train_params_grant_main.cc
deleted file mode 100644
index a106cac..0000000
--- a/ext/kenlm/lm/interpolate/train_params_grant_main.cc
+++ /dev/null
@@ -1,561 +0,0 @@
-#include "lm/ngram_query.hh"
-#include "lm/model.hh"
-#include "lm/word_index.hh"
-#include "lm/interpolate/enumerate_global_vocab.hh"
-
-#include <string>
-#include <vector>
-#include <iostream>
-#include <fstream>
-#include <map>
-#include <iomanip>
-
-#include <boost/program_options.hpp>
-#include <boost/version.hpp>
-#include <boost/foreach.hpp>
-
-#include "util/fixed_array.hh"
-
-#include <Eigen/Eigen>
-#include <Eigen/Dense>
-
-// typedef Eigen::MatrixXf FMatrix;
-// typedef Eigen::VectorXf FVector;
-typedef Eigen::MatrixXd DMatrix;
-typedef Eigen::VectorXd DVector;
-
-bool HAS_BIAS = true;
-
-using namespace lm::ngram;
-using namespace lm;
-
-inline double logProb(Model *model, const std::vector<std::string> &ctx,
-                      WordIndex word_idx) {
-  // Horribly inefficient
-  const Vocabulary &vocab = model->GetVocabulary();
-
-  State nextState;  // throwaway
-
-  WordIndex context_idx[ctx.size()];
-
-  // reverse context
-  for (std::size_t i = 0; i < ctx.size(); i++) {
-    context_idx[ctx.size() - 1 - i] = vocab.Index(ctx[i]);
-  }
-
-  FullScoreReturn score = model->FullScoreForgotState(
-      context_idx, &(context_idx[ctx.size() - 1]), word_idx, nextState);
-
-  double ret = score.prob;
-  // std::cerr << "w: " << word << " p: " << ret << std::endl;
-  return ret;
-}
-
-inline double logProb(Model *model, double unkprob,
-                      const std::vector<std::string> &ctx,
-                      const std::string &word) {
-  // Horribly inefficient
-  const Vocabulary &vocab = model->GetVocabulary();
-
-  WordIndex word_idx = vocab.Index(word);
-  if (word_idx == lm::kUNK) return unkprob;
-
-  return logProb(model, ctx, word_idx);
-}
-
-void set_features(const std::vector<std::string> &ctx, const std::string &word,
-                  const std::vector<Model *> &models,
-                  const std::vector<double> &unkprobs, DVector &v) {
-  if (HAS_BIAS) {
-    v(0) = 1;
-    for (std::size_t i = 0; i < models.size(); ++i)
-      v(i + 1) = logProb(models[i], unkprobs[i], ctx, word);
-  } else {
-    for (std::size_t i = 0; i < models.size(); ++i)
-      v(i) = logProb(models[i], unkprobs[i], ctx, word);
-  }
-}
-
-void train_params(const std::vector<std::vector<std::string> > &corpus,
-                  const std::vector<std::string> &vocab,
-                  const std::vector<Model *> &models) {
-  using namespace std;
-
-  // A safeguarded Newton's method to find optimum parameters.
-  // Reverts to steepest-descent linesearch if Newton step does not improve
-  // objective.
-  //
-  // Two Boolean variables below are used to "AllowExtrapolation" and
-  // "AllowNegativeParams".
-
-  bool AllowExtrapolation = true;   // if true, params need not sum to one
-  bool AllowNegativeParams = true;  // if true, params can be negative
-  const int ITERATIONS = 20;
-  double minstepsize = 1.0e-9;  // convergence criterion
-  int context_size = 5;  // (context_size+1)-grams considered in perplexity
-  double stepdecreasefactor = 0.1;        // if step unsuccessful
-  double initstepsize = 1.0;              // Initial step size
-  std::size_t linesinstartercorpus = 12;  // The first few lines are tuned
-                                          // first, to find basin of attraction
-                                          // for Newton
-  // bias + #models
-  const std::size_t nlambdas = models.size() + (HAS_BIAS ? 1 : 0);
-
-  // initialize to sum to 1
-  DVector params = DVector::Constant(nlambdas, 1.0 / nlambdas);
-  DMatrix N = DMatrix::Constant(
-      nlambdas, nlambdas - 1,
-      -1.0 / sqrt((nlambdas - 1) * (nlambdas - 1) + nlambdas - 1.0));
-  for (unsigned i = 0; i < nlambdas - 1; ++i)
-    N(i, i) = N(i, i) * (1.0 - nlambdas);
-  // N is nullspace matrix, each column sums to zero
-
-  cerr << setprecision(16) << "++ Parameter training ++" << endl;
-  if (AllowExtrapolation)
-    cerr << " Allowing extrapolation (sharpening and flattening of individual "
-            "LM distributions)" << endl;
-  else
-    cerr << " Interpolating only (not sharpening or flattening individual LM "
-            "distributions)" << endl;
-  if (AllowNegativeParams)
-    cerr << " Allowing negative parameters\n"
-         << "  (more general but slow and rarely useful\n"
-         << "   -LM with negative weight has probability rankings reversed and "
-            "is weighted more highly than all LMs with positive weights)"
-         << endl;
-  else
-    cerr << "Not allowing negative parameters (mild assumption, and faster)"
-         << endl;
-  cerr << " Maximum number of iterations: " << ITERATIONS << endl;
-  cerr << " Minimum step size: " << minstepsize << endl;
-  cerr << " Perplexity computed with " << context_size + 1 << "-grams" << endl;
-
-  if ((!AllowExtrapolation) && (nlambdas == 1)) {
-    // Nothing to optimize. One parameter, and it sums to one.
-    cerr << "Training complete. Best weights:" << endl;
-    cerr << setprecision(16) << 1.0 << endl;
-    return;
-  }
-
-  // Smart initialization of full tuning by tuning on smaller set first
-  vector<std::size_t> linestotune;
-  if (linesinstartercorpus < corpus.size())
-    linestotune.push_back(linesinstartercorpus);
-  linestotune.push_back(corpus.size());
-
-  for (std::size_t setiter = 0; setiter < linestotune.size(); ++setiter) {
-    cerr << " Now tuning the first " << linestotune[setiter] << " lines"
-         << endl;
-
-    vector<DVector> paramhistory;
-    double bestppl = 0.0;                          // best recorded ppl
-    DVector bestgrad = DVector::Zero(nlambdas);    // corresp. gradient,
-                                                   // feasible direction
-    DVector bestparams = DVector::Zero(nlambdas);  // corresp. weights
-    double maxbestgradstep = 0.0;    // max feasible step in grad. direction
-    double stepsize = initstepsize;  // Initial step size
-
-    for (int iter = 0; iter < ITERATIONS; ++iter) {  // iterations
-      cerr << "ITERATION " << iter + 1 << " (of max " << ITERATIONS
-           << "), step size " << stepsize << " (of min " << minstepsize
-           << "), weights: " << endl;
-      cerr << params << endl;
-
-      paramhistory.push_back(params);
-      // Hard-coded to be 6-gram perplexity
-      vector<string> context(context_size, "<s>");
-      double ppl = 0.0;
-      DVector grad = DVector::Zero(nlambdas);
-      DMatrix H = DMatrix::Zero(nlambdas, nlambdas);
-      cerr << "o";
-
-      std::vector<double> unkprobs(models.size());
-      // for each sentence in tuning corpus
-      for (std::size_t ci = 0; ci < linestotune[setiter]; ++ci) {
-        const vector<string> &sentence = corpus[ci];
-        // pad our beginning context
-        std::fill(context.begin(), context.end(), "<s>");
-
-        // for each word in sentence
-        for (std::size_t t = 0; t < sentence.size(); ++t) {
-          // fill in unk probabilities for this context, to avoid having to
-          // look them up redundantly later
-          for (std::size_t mi = 0; mi < models.size(); ++mi) {
-            unkprobs[mi] = logProb(models[mi], context, lm::kUNK);
-          }
-
-          DVector feats = DVector::Zero(nlambdas);
-          // probs for actual n-gram
-          set_features(context, sentence[t], models, unkprobs, feats);
-
-          double z = 0.0;
-          double maxlogprob = 0.0;  // Allows us to avoid overflow with
-                                    // negative params
-          DVector expectfeats = DVector::Zero(nlambdas);
-          DMatrix expectfeatmatrix = DMatrix::Zero(nlambdas, nlambdas);
-          // Logically, this should be in the loop's scope
-          DVector iterfeats = DVector::Zero(nlambdas);
-
-          // probs over possible n-grams, for normalization
-          for (std::size_t i = 0; i < vocab.size(); ++i) {
-            set_features(context, vocab[i], models, unkprobs, iterfeats);
-            double logprob = params.dot(iterfeats);
-            if (i == 0) {
-              // maxlogprob=logprob;// more precise, less underflow
-              maxlogprob = 0.0;  // reduces number of updates
-            } else if (logprob > maxlogprob) {
-              // Adjust all old values to new scaling
-              double adjust = exp(maxlogprob - logprob);
-              z *= adjust;
-              expectfeats *= adjust;
-              expectfeatmatrix *= adjust;
-              maxlogprob = logprob;
-            }
-            double us = exp(params.dot(iterfeats) - maxlogprob);  // measure
-
-            z += us;
-            expectfeats += us * iterfeats;
-            expectfeatmatrix += us * (iterfeats * iterfeats.transpose());
-          }
-          expectfeats /= z;       // Expectation
-          expectfeatmatrix /= z;  // Expectation
-
-          // Add sentence[t] to the end of the context
-          context[0] = sentence[t];
-          std::rotate(context.begin(), context.begin() + 1, context.end());
-
-          // Perplexity (actually log(perplexity))
-          ppl += params.dot(feats) - log(z);
-          // Gradient
-          grad += feats - expectfeats;
-          // Hessian
-          H += -expectfeatmatrix + expectfeats * expectfeats.transpose();
-        }
-        cerr << ".";
-      }
-      ppl *= -1.0 / corpus.size();
-      // The gradient and Hessian coefficients cancel out, so don't really need
-      // to do this, but it's fast.
-      grad *= -1.0 / corpus.size();
-      H *= -1.0 / corpus.size();
-      cerr << " log(PPL)=" << ppl << " PPL=" << exp(ppl) << endl;
-
-      // Use results to determine next params to evaluate
-      if ((ppl < bestppl) || (iter == 0)) {
-        // Found a new best
-        bestppl = ppl;
-        bestparams = params;
-        double beststepsize = stepsize;
-        if (iter > 0)
-          cerr << " New best point found, step size " << beststepsize << endl;
-        else
-          cerr << " New best point found" << endl;
-
-        bestgrad = grad;
-        DVector deltaparams = DVector::Zero(nlambdas);
-
-        bool reverttograd = false;
-
-        {
-          double gradnorm = 0.0;
-          double solvenorm = 0.0;
-          double errnorm = 0.0;
-          // Find Newton step
-          if (AllowExtrapolation) {
-            deltaparams = -H.colPivHouseholderQr().solve(grad);
-            Eigen::SelfAdjointEigenSolver<DMatrix> eigensolver(H);
-            cerr << "Eigenvalues (negative values should be negligible):\n"
-                 << eigensolver.eigenvalues() << endl;
-            gradnorm = grad.norm();
-            solvenorm = (H * deltaparams).norm();
-            errnorm = (grad + H * deltaparams).norm();
-          } else {
-            // Project gradient to interpolation space
-            bestgrad = N * N.transpose() * bestgrad;
-
-            // need to work in nullspace to maintain unit sum
-            DMatrix Hnull = DMatrix::Zero(nlambdas - 1, nlambdas - 1);
-
-            // Looks like we don't need the three lines below -- we can do it
-            // in-line (if we don't want eigenvalues)
-            Hnull = N.transpose() * H * N;
-            Eigen::SelfAdjointEigenSolver<DMatrix> eigensolver(Hnull);
-            cerr << "Eigenvalues (best if all positive):\n"
-                 << eigensolver.eigenvalues() << endl;
-            deltaparams =
-                -N * Hnull.fullPivHouseholderQr().solve(N.transpose() * grad);
-            gradnorm = (N.transpose() * grad).norm();
-            solvenorm = (Hnull * deltaparams).norm();
-            errnorm = (N.transpose() * grad + Hnull * deltaparams).norm();
-          }
-          // eventually, params = bestparams + deltaparams;
-          cerr << " Error norm " << errnorm << ", gradient norm " << gradnorm
-               << ", solution norm " << solvenorm << endl;
-          // Check for numerical errors. Don't trust Newton step if they are too
-          // big.
-          if (errnorm < 1e-12 * std::max(1.0, std::min(gradnorm, solvenorm))) {
-            stepsize = 0.0;
-            for (std::size_t i = 0; i < nlambdas; i++)
-              stepsize += deltaparams(i) * deltaparams(i);
-            stepsize = sqrt(stepsize);  // holds length of Newton step
-            cerr << "Newton step, length " << stepsize << ": " << endl;
-            cerr << deltaparams << endl;
-
-            // Don't let the Newton step get much bigger than last successful
-            // step (likely would have to shrink later, anyway)
-            if (stepsize > 2.0 * beststepsize) {
-              stepsize = 1.5 * beststepsize;
-              reverttograd = true;
-              cerr << "Reverting to gradient, because Newton step is too large."
-                   << endl;
-            }
-          } else {
-            stepsize = 1.5 * beststepsize;
-            reverttograd = true;
-            cerr << "Reverting to gradient, because Newton step computation "
-                    "unsuccessful." << endl;
-          }
-          // Make the gradient unit norm, in feasible search direction.
-          if (!AllowNegativeParams) {
-            // Project gradient to be a feasible search direction
-            vector<bool> active(nlambdas, false);
-            std::size_t numactive = 0;
-            for (std::size_t i = 0; i < nlambdas; i++) {
-              // Project gradient to inactive constraints
-              if ((bestparams(i) == 0) && (bestgrad(i) > 0)) {
-                active[i] = true;
-                bestgrad(i) = 0.0;  // Do this now, in case extrapolation
-                                    // allowed.
-                ++numactive;
-              }
-            }
-            if (numactive > 0) {
-              if (!AllowExtrapolation) {
-                // Project gradient, for activity concerns
-                DMatrix tmpN = DMatrix::Constant(
-                    nlambdas, nlambdas - 1,
-                    -1.0 / sqrt((nlambdas - numactive - 1) *
-                                    (nlambdas - numactive - 1) +
-                                nlambdas - numactive - 1.0));
-
-                for (std::size_t i = 0; i < nlambdas - 1; ++i)
-                  tmpN(i, i) = tmpN(i, i) * (1.0 - (nlambdas - numactive));
-
-                for (std::size_t i = 0; i < nlambdas; ++i) {
-                  if (active[i]) {
-                    for (std::size_t j = 0; j < nlambdas - 1; ++i) {
-                      tmpN(i, j) = 0;
-                    }
-                  }
-                }
-
-                // projected gradient onto unit sum and active set constraints
-                bestgrad = -tmpN * tmpN.transpose() * bestgrad;
-              }
-            }
-          }
-        }
-        double norm = 0.0;
-        for (std::size_t i = 0; i < nlambdas; i++)
-          norm += bestgrad(i) * bestgrad(i);
-        if (norm != 0) {
-          bestgrad /= sqrt(norm);
-        } else {
-          cerr << " Gradient is zero. Exiting.";
-          break;
-        }
-        cerr << "Gradient, unit length: " << endl;
-        cerr << bestgrad << endl;
-
-        // Find max step in gradient direction that remains feasible.
-        if (!AllowNegativeParams) {
-          double limitfraction = 0.5;  // Not 1: If Newton step is bad, probably
-                                       // will need to reduce later anyway
-          for (std::size_t i = 0; i < nlambdas; i++) {
-            if (bestparams(i) - maxbestgradstep * bestgrad(i) < 0) {
-              double tmplimitfraction =
-                  bestparams(i) / (bestgrad(i) * maxbestgradstep);
-              if (tmplimitfraction < limitfraction)
-                limitfraction = tmplimitfraction;
-            }
-          }
-          maxbestgradstep = stepsize * limitfraction;
-          cerr << " Max grad step: " << maxbestgradstep << endl;
-        } else {
-          maxbestgradstep = stepsize;
-        }
-
-        if (!reverttograd) {
-          if (!AllowNegativeParams) {
-            for (std::size_t i = 0; i < nlambdas; i++) {
-              if (bestparams(i) + deltaparams(i) < 0) {
-                // Can't do Newton step. Revert to descent.
-                reverttograd = true;
-              }
-            }
-          }
-          if (reverttograd) {
-            cerr << "Reverting to gradient, since Newton step infeasible:"
-                 << endl;
-          }
-        }
-
-        if (reverttograd) {
-          stepsize = maxbestgradstep;
-          deltaparams = -bestgrad * stepsize;
-        }
-
-        params = bestparams + deltaparams;
-        cerr << "Change in weights from best, step size " << stepsize << ": "
-             << endl;
-        cerr << deltaparams << endl;
-      } else {
-        // Last attempt failed at being better, so move in gradient direction
-        // with reduced step.
-        // stepsize reduction factor is empirical
-        stepsize = std::min(stepdecreasefactor * stepsize, maxbestgradstep);
-        cerr << "Taking smaller step: " << stepsize << endl;
-        params = bestparams - bestgrad * stepsize;
-      }
-      // Clean the parameters up.
-      double sumparams = 0.0;
-      for (std::size_t i = 0; i < nlambdas; i++) {
-        if (!AllowNegativeParams) {
-          if (params(i) < 1e-12) {
-            // snap to zero, for active set and duplicate weights
-            params(i) = 0;
-          }
-        }
-        sumparams += params(i);
-      }
-      if (!AllowExtrapolation) params /= sumparams;
-
-      bool duplicateentry = false;
-      for (std::size_t i = 0; i < paramhistory.size(); ++i) {
-        if (params == paramhistory[i]) duplicateentry = true;
-      }
-      while ((duplicateentry) && (stepsize >= minstepsize)) {
-        cerr << "Duplicate weight found: " << endl;
-        cerr << params << endl;
-        stepsize *= 0.5;  // Step in this direction is duplicate, so try again
-                          // with smaller step
-        params = bestparams - stepsize * bestgrad;
-
-        sumparams = 0.0;
-        for (std::size_t i = 0; i < nlambdas; i++) {
-          if (!AllowNegativeParams) {
-            if (params(i) < 1e-12) params(i) = 0;
-          }
-          sumparams += params(i);
-        }
-        if (!AllowExtrapolation) params /= sumparams;
-
-        duplicateentry = false;
-        for (std::size_t i = 0; i < paramhistory.size(); ++i) {
-          if (params == paramhistory[i]) duplicateentry = true;
-        }
-      }
-      if (stepsize < minstepsize) break;  // No need to make another step
-    }
-
-    params = bestparams;  // So that next setiter is correct
-    cerr << "Training complete. Best weights:" << endl;
-    cerr << params << endl;
-  }
-}
-
-int main(int argc, char **argv) {
-  std::string tuning_data;
-  std::vector<std::string> lms;
-
-  try {
-    namespace po = boost::program_options;
-    po::options_description options("train-params");
-
-    options.add_options()("help,h", po::bool_switch(),
-                          "Show this help message")(
-        "no_bias_term,B", po::bool_switch(), "Do not include a 'bias' feature")(
-        "tuning_data,t", po::value<std::string>(&tuning_data),
-        "File to tune perplexity on")(
-        "model,m", po::value<std::vector<std::string> >(&lms),
-        "Language models in KenLM format to interpolate");
-    po::variables_map vm;
-    po::store(po::parse_command_line(argc, argv, options), vm);
-
-    // Display help
-    if (argc == 1 || vm["help"].as<bool>()) {
-      std::cerr << options << std::endl;
-      return 1;
-    }
-    if (vm["no_bias_term"].as<bool>()) HAS_BIAS = false;
-    lms = vm["model"].as<std::vector<std::string> >();
-    tuning_data = vm["tuning_data"].as<std::string>();
-  } catch (const std::exception &e) {
-    std::cerr << e.what() << std::endl;
-    return 1;
-  }
-  if (lms.size() < 2) {
-    std::cerr
-        << "Please specify at least two language model files with -m LM.KLM\n";
-    return 1;
-  }
-  if (tuning_data.empty()) {
-    std::cerr << "Please specify tuning set with -t FILE.TXT\n";
-    return 1;
-  }
-
-  // Growable vocab here
-  // GrowableVocab gvoc(100000); //dummy default
-
-  // no comment
-  std::map<std::string, int *> vmap;
-
-  // stuff it into the
-  EnumerateGlobalVocab *globalVocabBuilder =
-      new EnumerateGlobalVocab(&vmap, lms.size());
-
-  Config cfg;
-  cfg.enumerate_vocab = (EnumerateVocab *)globalVocabBuilder;
-
-  // load models
-  std::vector<Model *> models;
-  for (std::size_t i = 0; i < lms.size(); i++) {
-    std::cerr << "Loading LM file: " << lms[i] << std::endl;
-
-    // haaaack
-    globalVocabBuilder->SetCurModel(i);  // yes this is dumb
-
-    Model *this_model = new Model(lms[i].c_str(), cfg);
-    models.push_back(this_model);
-  }
-
-  // assemble vocabulary vector
-  std::vector<std::string> vocab;
-  std::cerr << "Global Vocab Map has size: " << vmap.size() << std::endl;
-
-  for (std::map<std::string, int *>::iterator iter = vmap.begin();
-       iter != vmap.end(); ++iter) {
-    vocab.push_back(iter->first);
-  }
-  std::cerr << "Vocab vector has size: " << vocab.size() << std::endl;
-
-  // load context sorted ngrams into vector of vectors
-  std::vector<std::vector<std::string> > corpus;
-
-  std::cerr << "Loading context-sorted ngrams: " << tuning_data << std::endl;
-  std::ifstream infile(tuning_data.c_str());
-
-  for (std::string line; std::getline(infile, line);) {
-    std::vector<std::string> words;
-    std::stringstream stream(line);
-    std::string word;
-
-    while (stream >> word)
-      words.push_back(word);
-    corpus.push_back(words);
-  }
-
-  train_params(corpus, vocab, models);
-
-  return 0;
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/train_params_main.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/train_params_main.cc b/ext/kenlm/lm/interpolate/train_params_main.cc
deleted file mode 100644
index 39233e8..0000000
--- a/ext/kenlm/lm/interpolate/train_params_main.cc
+++ /dev/null
@@ -1,349 +0,0 @@
-#include "lm/ngram_query.hh"
-#include "lm/model.hh"
-#include "lm/word_index.hh"
-#include "lm/interpolate/enumerate_global_vocab.hh"
-
-
-#include <string>
-#include <vector>
-#include <iostream>
-#include <fstream>
-#include <map>
-
-#include <boost/program_options.hpp>
-#include <boost/version.hpp>
-#include <boost/foreach.hpp>
-
-#include "util/fixed_array.hh"
-
-#include <Eigen/Eigen>
-
-typedef Eigen::MatrixXf FMatrix;
-typedef Eigen::VectorXf FVector;
-
-bool HAS_BIAS = true;
-
-using namespace lm::ngram;
-using namespace lm;
-
-inline float logProb(Model * model, const std::vector<std::string>& ctx, const std::string& word) {
-
-  // Horribly inefficient
-  const Vocabulary &vocab = model->GetVocabulary();
-
-  State nextState; //throwaway
-
-  WordIndex word_idx = vocab.Index(word);
-  WordIndex context_idx[ctx.size()];
-
-  //reverse context
-  for(unsigned int i = 0; i < ctx.size(); i++) {
-    context_idx[ctx.size() - 1 - i] = vocab.Index(ctx[i]);
-  }
-
-  FullScoreReturn score = model->FullScoreForgotState(context_idx, &(context_idx[ctx.size() -1]), word_idx, nextState);
-
-  float ret = score.prob;
-  //std::cerr << "w: " << word << " p: " << ret << std::endl;
-  return ret;
-}
-
-void set_features(const std::vector<std::string>& ctx,
-                  const std::string& word,
-                  const std::vector<Model *>& models,
-                  FVector& v) {
-
-  //std::cerr << "setting feats for " << word << std::endl;
-
-  if (HAS_BIAS) {
-    v(0) = 1;
-    for (unsigned i=0; i < models.size(); ++i)
-      v(i + 1) = logProb(models[i], ctx, word);
-  } else {
-    for (unsigned i=0; i < models.size(); ++i)
-      v(i) = logProb(models[i], ctx, word);
-  }
-}
-
-void translate_input(
-    const std::vector<std::vector<std::string> >& corpus,
-    const std::vector<std::string>& gvocab,
-    const std::vector<Model *>& models,
-    std::vector<std::vector<std::vector<WordIndex> > >&translated_corpus,
-    std::vector<std::vector<WordIndex> >&translated_vocab
-  ) {
-  translated_corpus.resize(models.size());
-  translated_vocab.resize(models.size());
-  for (unsigned mn=0; mn < models.size(); ++mn) { // models 
-
-    const Vocabulary &vocab = models[mn]->GetVocabulary();
-
-    for (unsigned i = 0; i < gvocab.size(); ++i) {
-      translated_vocab[mn].push_back(vocab.Index(gvocab[i]));
-    }
-    
-    translated_corpus[mn].resize(corpus.size());
-    for (unsigned ci = 0; ci < corpus.size(); ++ci) { // sentences in tuning corpus
-      const std::vector<std::string>& sentence = corpus[ci];
-      for (int t = sentence.size() -1; t >= 0; --t) { // words in sentence
-        translated_corpus[mn][ci].push_back(vocab.Index(sentence[t]));
-      }
-      for (int i=0; i<5; ++i) {
-        translated_corpus[mn][ci].push_back(vocab.Index("<s>"));
-      }
-    }
-  }
-}
-      
-
-void train_params_fast(
-    const std::vector<std::vector<std::string> >& corpus,
-    const std::vector<std::string>& vocab,
-    const std::vector<Model *>& models) {
-  using namespace std;
-
-  // model / sentence / words in sentence in reverse order with <s> padding 
-  std::vector<std::vector<std::vector<WordIndex> > > t_corpus;
-  std::vector<std::vector<WordIndex> > t_vocab;
-  translate_input(corpus, vocab, models, t_corpus, t_vocab);
-
-
-
-  const int ITERATIONS = 10;
-  const int nlambdas = models.size() + (HAS_BIAS ? 1 : 0); // bias + #models
-  FVector params = FVector::Zero(nlambdas);
-  vector<FVector> feats(vocab.size(), params);
-  vector<float> us(vocab.size(), 0);
-  vector<float> ps(vocab.size(), 0);
-  FVector grad = FVector::Zero(nlambdas);
-  FMatrix H = FMatrix::Zero(nlambdas, nlambdas);
-  FVector ef = FVector::Zero(nlambdas);
-  for (int iter = 0; iter < ITERATIONS; ++iter) { // iterations
-    grad.setZero();
-    H.setZero();
-    double loss = 0;
-    unsigned numchars = 0;
-    for (unsigned ci = 0; ci < corpus.size(); ++ci) { // sentences in tuning corpus
-      const vector<string>& sentence = corpus[ci];
-      double z = 0;
-      for (int t = sentence.size() -1 ; t >=0; --t) { // words in sentence
-        ++numchars;
-        int ref_word = 0;
-        for (unsigned i = 0; i < vocab.size(); ++i) { // vocab
-          // set_features(context, vocab[i], models, feats[i]);
-          for (unsigned j=0; j < models.size(); ++j) {
-            // NOTE: reference ---- WordIndex word_idx = t_corpus[j][ci][t];
-            WordIndex word_idx = t_vocab[j][i];
-            State nextState; //throwaway
-            FullScoreReturn score = models[j]->FullScoreForgotState(&(t_corpus[j][ci][t]), &(t_corpus[j][ci][t+5]), word_idx, nextState);
-            feats[i](j) = score.prob;
-            // feats[i](j) = logProb(models[j], ctx, word);
-          }
-          
-          us[i] = params.dot(feats[i]);
-          z += exp(double(us[i]));
-        }
-	//std::cerr << "there..." << std::endl;
-        const float logz = log(z);
-
-        // expected feature values
-        ef.setZero();
-        for (unsigned i = 0; i < vocab.size(); ++i) {
-          ps[i] = expf(us[i] - logz);
-          ef += ps[i] * feats[i];
-        }
-        loss -= log(ps[ref_word]);
-        const FVector& reffeats = feats[ref_word];
-        grad += ef - reffeats;
-
-        // Hessian
-        for (unsigned i = 0; i < vocab.size(); ++i)
-          H.noalias() += ps[i] * feats[i] * feats[i].transpose() -
-                         ps[i] * feats[i] * ef.transpose();
-
-        // this should just be the state for each model
-      }
-      cerr << ".";
-    }
-    cerr << "ITERATION " << (iter + 1) << ": PPL=" << exp(loss / numchars) << endl;
-    params = H.colPivHouseholderQr().solve(grad);
-    cerr << params << endl;
-  }
-}
-          
-  
-
-
-//const util::FixedArray<Model *>& models)
-void train_params(
-    const std::vector<std::vector<std::string> >& corpus,
-    const std::vector<std::string>& vocab,
-    const std::vector<Model *>& models) {
-  using namespace std;
-
-  vector<string> context(5, "<s>");
-  const int ITERATIONS = 10;
-  const int nlambdas = models.size() + (HAS_BIAS ? 1 : 0); // bias + #models
-  FVector params = FVector::Zero(nlambdas);
-  vector<FVector> feats(vocab.size(), params);
-  vector<float> us(vocab.size(), 0);
-  vector<float> ps(vocab.size(), 0);
-  FVector grad = FVector::Zero(nlambdas);
-  FMatrix H = FMatrix::Zero(nlambdas, nlambdas);
-  FVector ef = FVector::Zero(nlambdas);
-  for (int iter = 0; iter < ITERATIONS; ++iter) { // iterations
-    grad.setZero();
-    H.setZero();
-    double loss = 0;
-    unsigned numchars = 0;
-    for (unsigned ci = 0; ci < corpus.size(); ++ci) { // sentences in tuning corpus
-      const vector<string>& sentence = corpus[ci];
-      std::fill(context.begin(), context.end(), "<s>");
-      for (unsigned t = 0; t < sentence.size(); ++t) { // words in sentence
-        ++numchars;
-        const string& ref_word_string = sentence[t];
-        int ref_word = 0; // TODO
-        double z = 0;
-	//std::cerr << "here..." << std::endl;
-        for (unsigned i = 0; i < vocab.size(); ++i) { // vocab
-          set_features(context, vocab[i], models, feats[i]);
-          us[i] = params.dot(feats[i]);
-          z += exp(double(us[i]));
-        }
-	//std::cerr << "there..." << std::endl;
-        context.push_back(ref_word_string);
-        const float logz = log(z);
-
-        // expected feature values
-        ef.setZero();
-        for (unsigned i = 0; i < vocab.size(); ++i) {
-          ps[i] = expf(us[i] - logz);
-          ef += ps[i] * feats[i];
-        }
-        loss -= log(ps[ref_word]);
-        const FVector& reffeats = feats[ref_word];
-        grad += ef - reffeats;
-
-        // Hessian
-        for (unsigned i = 0; i < vocab.size(); ++i)
-          H.noalias() += ps[i] * feats[i] * feats[i].transpose() -
-                         ps[i] * feats[i] * ef.transpose();
-
-        // this should just be the state for each model
-      }
-      cerr << ".";
-    }
-    cerr << "ITERATION " << (iter + 1) << ": PPL=" << exp(loss / numchars) << endl;
-    params = H.colPivHouseholderQr().solve(grad);
-    cerr << params << endl;
-  }
-}
-
-int main(int argc, char** argv) {
-
-  std::string tuning_data;
-  std::vector<std::string> lms;
-
-  try {
-    namespace po = boost::program_options;
-    po::options_description options("train-params");
-
-    options.add_options()
-      ("help,h", po::bool_switch(), "Show this help message")
-      ("no_bias_term,B", po::bool_switch(), "Do not include a 'bias' feature")
-      ("tuning_data,t", po::value<std::string>(&tuning_data), "File to tune perplexity on")
-      ("model,m", po::value<std::vector<std::string> >(&lms), "Language models in KenLM format to interpolate");
-    po::variables_map vm;
-    po::store(po::parse_command_line(argc, argv, options), vm);
-
-    // Display help
-    if(argc == 1 || vm["help"].as<bool>()) {
-      std::cerr << options << std::endl;
-      return 1;
-    }
-    if (vm["no_bias_term"].as<bool>())
-      HAS_BIAS = false;
-    lms = vm["model"].as<std::vector<std::string> >();
-    tuning_data = vm["tuning_data"].as<std::string>();
-  }
-  catch(const std::exception &e) {
-
-    std::cerr << e.what() << std::endl;
-    return 1;
-
-  }
-  if (lms.size() < 2) {
-    std::cerr << "Please specify at least two language model files with -m LM.KLM\n";
-    return 1;
-  }
-  if (tuning_data.empty()) {
-    std::cerr << "Please specify tuning set with -t FILE.TXT\n";
-    return 1;
-  }
-
-  //Growable vocab here
-  //GrowableVocab gvoc(100000); //dummy default
-
-  std::map<std::string, int*> vmap;
-  util::FixedArray<WordIndex> vm(2);
-
-  //stuff it into the
-  EnumerateGlobalVocab * globalVocabBuilder = new EnumerateGlobalVocab(&vmap, lms.size());
-  // EnumerateGlobalVocab * globalVocabBuilder = new EnumerateGlobalVocab(vm);
-
-  Config cfg;
-  cfg.enumerate_vocab = (EnumerateVocab *) globalVocabBuilder;
-
-  //load models
-  //util::FixedArray<Model *> models(lms.size());
-  std::vector<Model *> models;
-  for(int i=0; i < lms.size(); i++) {
-    std::cerr << "Loading LM file: " << lms[i] << std::endl;
-
-    //haaaack
-    globalVocabBuilder->SetCurModel(i); //yes this is dumb
-
-    //models[i] = new Model(lms[i].c_str());
-    Model * this_model = new Model(lms[i].c_str(), cfg);
-    models.push_back( this_model );
-
-  }
-
-  //assemble vocabulary vector
-  std::vector<std::string> vocab;
-  std::cerr << "Global Vocab Map has size: " << vmap.size() << std::endl;
-
-  std::pair<StringPiece,int *> me;
-
-  for(std::map<std::string, int*>::iterator iter = vmap.begin(); iter != vmap.end(); ++iter) {
-    vocab.push_back(iter->first);
-  }
-  std::cerr << "Vocab vector has size: " << vocab.size() << std::endl;
-
-  //load context sorted ngrams into vector of vectors
-  std::vector<std::vector<std::string> > corpus;
-
-  std::cerr << "Loading context-sorted ngrams: " << tuning_data << std::endl;
-  std::ifstream infile(tuning_data);
-
-  for(std::string line; std::getline(infile, line); ) {
-
-    std::vector<std::string> words; {
-
-      std::stringstream stream(line);
-      std::string word;
-
-      while(stream >> word) {
-	words.push_back(word);
-      }
-    }
-    corpus.push_back(words);
-  }
-
-  train_params_fast(corpus, vocab, models);
-
-  return 0;
-}
-
-
-

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/tune_derivatives.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/tune_derivatives.cc b/ext/kenlm/lm/interpolate/tune_derivatives.cc
deleted file mode 100644
index c20e637..0000000
--- a/ext/kenlm/lm/interpolate/tune_derivatives.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-#include "lm/interpolate/tune_derivatives.hh"
-
-namespace lm { namespace interpolate {
-
-ComputeDerivative::ComputeDerivative(const util::FixedArray<Instance> &instances, const Matrix &ln_unigrams, WordIndex bos)
-  : instances_(instances), ln_unigrams_(ln_unigrams), bos_(bos) {
-  neg_correct_summed_ = Vector::Zero(ln_unigrams.cols());
-  for (const Instance *i = instances.begin(); i != instances.end(); ++i) {
-    neg_correct_summed_ -= i->ln_correct;
-  }
-}
-
-Accum ComputeDerivative::Iteration(const Vector &weights, Vector &gradient, Matrix &hessian) {
-  gradient = neg_correct_summed_;
-  hessian = Matrix::Zero(weights.rows(), weights.rows());
-
-  // TODO: loop instead to force low-memory evaluation
-  // Compute p_I(x).
-  Vector interp_uni((ln_unigrams_ * weights).array().exp());
-  // Even -inf doesn't work for <s> because weights can be negative.  Manually set it to zero.
-  interp_uni(bos_) = 0.0;
-  Accum Z_epsilon = interp_uni.sum();
-  interp_uni /= Z_epsilon;
-  // unigram_cross(i) = \sum_{all x} p_I(x) ln p_i(x)
-  Vector unigram_cross(ln_unigrams_.transpose() * interp_uni);
-
-  Accum sum_B_I = 0.0;
-  Accum sum_ln_Z_context = 0.0;
-
-  Vector weighted_extensions;
-  Matrix convolve;
-  Vector full_cross;
-
-  for (const Instance *n = instances_.begin(); n != instances_.end(); ++n) {
-    Accum ln_weighted_backoffs = n->ln_backoff.dot(weights);
-    Accum weighted_backoffs = exp(ln_weighted_backoffs);
-
-    // Compute \sum_{x: model does not backoff to unigram} p_I(x)
-    Accum sum_x_p_I = 0.0;
-    for (std::vector<WordIndex>::const_iterator x = n->extension_words.begin(); x != n->extension_words.end(); ++x) {
-      sum_x_p_I += interp_uni(*x);
-    }
-    weighted_extensions = (n->ln_extensions * weights).array().exp();
-    Accum Z_context = Z_epsilon * weighted_backoffs * (1.0 - sum_x_p_I) + weighted_extensions.sum();
-    sum_ln_Z_context += log(Z_context);
-
-    Accum B_I = Z_epsilon / Z_context * weighted_backoffs;
-    sum_B_I += B_I;
-
-    // This is the gradient term for this instance except for -log p_i(w_n | w_1^{n-1}) which was accounted for as part of neg_correct_sum_.
-    // full_cross(i) is \sum_{all x} p_I(x | context) log p_i(x | context)
-    full_cross =
-      // Uncorrected term
-      B_I * (n->ln_backoff + unigram_cross)
-      // Correction term: add correct values
-      + n->ln_extensions.transpose() * weighted_extensions / Z_context
-      // Subtract values that should not have been charged.
-      - sum_x_p_I * B_I * n->ln_backoff;
-    for (std::vector<WordIndex>::const_iterator x = n->extension_words.begin(); x != n->extension_words.end(); ++x) {
-      full_cross.noalias() -= interp_uni(*x) * B_I * ln_unigrams_.row(*x);
-    }
-
-    gradient += full_cross;
-
-    convolve = unigram_cross * n->ln_backoff.transpose();
-    // There's one missing term here, which is independent of context and done at the end.
-    hessian.noalias() +=
-      // First term of Hessian, assuming all models back off to unigram.
-      B_I * (convolve + convolve.transpose() + n->ln_backoff * n->ln_backoff.transpose())
-      // Second term of Hessian, with correct full probabilities.
-      - full_cross * full_cross.transpose();
-
-    // Adjust the first term of the Hessian to account for extension
-    for (std::size_t x = 0; x < n->extension_words.size(); ++x) {
-      WordIndex universal_x = n->extension_words[x];
-      hessian.noalias() +=
-        // Replacement terms.
-        weighted_extensions(x) / Z_context * n->ln_extensions.row(x).transpose() * n->ln_extensions.row(x)
-        // Presumed unigrams.  TODO: individual terms with backoffs pulled out?  Maybe faster?
-        - interp_uni(universal_x) * B_I * (ln_unigrams_.row(universal_x).transpose() + n->ln_backoff) * (ln_unigrams_.row(universal_x) + n->ln_backoff.transpose());
-    }
-  }
-
-  for (Matrix::Index x = 0; x < interp_uni.rows(); ++x) {
-    // \sum_{contexts} B_I(context) \sum_x p_I(x) log p_i(x) log p_j(x)
-    hessian.noalias() += sum_B_I * interp_uni(x) * ln_unigrams_.row(x).transpose() * ln_unigrams_.row(x);
-  }
-  return exp((neg_correct_summed_.dot(weights) + sum_ln_Z_context) / static_cast<double>(instances_.size()));
-}
-
-}} // namespaces

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/tune_derivatives.hh
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/tune_derivatives.hh b/ext/kenlm/lm/interpolate/tune_derivatives.hh
deleted file mode 100644
index 40c058e..0000000
--- a/ext/kenlm/lm/interpolate/tune_derivatives.hh
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef LM_INTERPOLATE_TUNE_DERIVATIVES_H
-#define LM_INTERPOLATE_TUNE_DERIVATIVES_H
-
-#include "lm/interpolate/tune_instance.hh"
-
-#include <Eigen/Core>
-#include <cmath>
-
-namespace lm { namespace interpolate {
-
-class ComputeDerivative {
-  public:
-    explicit ComputeDerivative(const util::FixedArray<Instance> &instances, const Matrix &ln_unigrams, WordIndex bos);
-
-    Accum Iteration(const Vector &weights, Vector &gradient, Matrix &hessian);
-
-  private:
-    const util::FixedArray<Instance> &instances_;
-    const Matrix &ln_unigrams_;
-
-    const WordIndex bos_;
-
-    // neg_correct_summed_(i) = -\sum_n ln p_i(w_n | w_1^{n-1})
-    Vector neg_correct_summed_;
-};
-
-}} // namespaces
-
-#endif // LM_INTERPOLATE_TUNE_DERIVATIVES_H
-

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/tune_derivatives_test.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/tune_derivatives_test.cc b/ext/kenlm/lm/interpolate/tune_derivatives_test.cc
deleted file mode 100644
index 75c0d12..0000000
--- a/ext/kenlm/lm/interpolate/tune_derivatives_test.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-#include "lm/interpolate/tune_derivatives.hh"
-
-#include "lm/interpolate/tune_instance.hh"
-
-#define BOOST_TEST_MODULE DerivativeTest
-#include <boost/test/unit_test.hpp>
-
-namespace lm { namespace interpolate { namespace {
-
-BOOST_AUTO_TEST_CASE(Small) {
-  // Three vocabulary words plus <s>, two models.
-  Matrix unigrams(4, 2);
-  unigrams <<
-    0.1, 0.6,
-    0.4, 0.3,
-    0.5, 0.1,
-    // <s>
-    1.0, 1.0;
-  unigrams = unigrams.array().log();
-
-  // One instance
-  util::FixedArray<Instance> instances(1);
-  instances.push_back(2);
-  Instance &instance = instances.back();
-
-  instance.ln_backoff << 0.2, 0.4;
-  instance.ln_backoff = instance.ln_backoff.array().log();
-
-  // Sparse cases: model 0 word 2 and model 1 word 1.
-
-  // Assuming that model 1 only matches word 1, this is p_1(1 | context)
-  Accum model_1_word_1 = 1.0 - .6 * .4 - .1 * .4;
-
-  // We'll suppose correct has WordIndex 1, which backs off in model 0, and matches in model 1
-  instance.ln_correct << (0.4 * 0.2), model_1_word_1;
-  instance.ln_correct = instance.ln_correct.array().log();
-
-  Accum model_0_word_2 = 1.0 - .1 * .2 - .4 * .2;
-
-  instance.extension_words.push_back(1);
-  instance.extension_words.push_back(2);
-  instance.ln_extensions.resize(2, 2);
-  instance.ln_extensions <<
-    (0.4 * 0.2), model_1_word_1,
-    model_0_word_2, 0.1 * 0.4;
-  instance.ln_extensions = instance.ln_extensions.array().log();
-
-  ComputeDerivative compute(instances, unigrams, 3);
-  Vector weights(2);
-  weights << 0.9, 1.2;
-
-  Vector gradient(2);
-  Matrix hessian(2,2);
-  compute.Iteration(weights, gradient, hessian);
-
-  // p_I(x | context)
-  Vector p_I(3);
-  p_I <<
-    pow(0.1 * 0.2, 0.9) * pow(0.6 * 0.4, 1.2),
-    pow(0.4 * 0.2, 0.9) * pow(model_1_word_1, 1.2),
-    pow(model_0_word_2, 0.9) * pow(0.1 * 0.4, 1.2);
-  p_I /= p_I.sum();
-
-  Vector expected_gradient = -instance.ln_correct;
-  expected_gradient(0) += p_I(0) * log(0.1 * 0.2);
-  expected_gradient(0) += p_I(1) * log(0.4 * 0.2);
-  expected_gradient(0) += p_I(2) * log(model_0_word_2);
-  BOOST_CHECK_CLOSE(expected_gradient(0), gradient(0), 0.01);
-
-  expected_gradient(1) += p_I(0) * log(0.6 * 0.4);
-  expected_gradient(1) += p_I(1) * log(model_1_word_1);
-  expected_gradient(1) += p_I(2) * log(0.1 * 0.4);
-  BOOST_CHECK_CLOSE(expected_gradient(1), gradient(1), 0.01);
-
-  Matrix expected_hessian(2, 2);
-  expected_hessian(1, 0) =
-    // First term
-    p_I(0) * log(0.1 * 0.2) * log(0.6 * 0.4) +
-    p_I(1) * log(0.4 * 0.2) * log(model_1_word_1) +
-    p_I(2) * log(model_0_word_2) * log(0.1 * 0.4);
-  expected_hessian(1, 0) -=
-    (p_I(0) * log(0.1 * 0.2) + p_I(1) * log(0.4 * 0.2) + p_I(2) * log(model_0_word_2)) *
-    (p_I(0) * log(0.6 * 0.4) + p_I(1) * log(model_1_word_1) + p_I(2) * log(0.1 * 0.4));
-  expected_hessian(0, 1) = expected_hessian(1, 0);
-  BOOST_CHECK_CLOSE(expected_hessian(1, 0), hessian(1, 0), 0.01);
-  BOOST_CHECK_CLOSE(expected_hessian(0, 1), hessian(0, 1), 0.01);
-}
-
-}}} // namespaces

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/tune_instance.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/tune_instance.cc b/ext/kenlm/lm/interpolate/tune_instance.cc
deleted file mode 100644
index f1c9924..0000000
--- a/ext/kenlm/lm/interpolate/tune_instance.cc
+++ /dev/null
@@ -1,354 +0,0 @@
-#include "lm/interpolate/tune_instance.hh"
-
-#include "lm/common/model_buffer.hh"
-#include "lm/common/ngram_stream.hh"
-#include "lm/common/renumber.hh"
-#include "lm/enumerate_vocab.hh"
-#include "lm/interpolate/merge_vocab.hh"
-#include "lm/interpolate/universal_vocab.hh"
-#include "lm/lm_exception.hh"
-#include "util/file_piece.hh"
-#include "util/murmur_hash.hh"
-#include "util/stream/chain.hh"
-#include "util/tokenize_piece.hh"
-
-#include <boost/unordered_map.hpp>
-
-#include <cmath>
-#include <limits>
-#include <vector>
-
-namespace lm { namespace interpolate {
-
-// An extension without backoff weights applied yet.
-#pragma pack(push)
-#pragma pack(1)
-struct InitialExtension {
-  Extension ext;
-  // Order from which it came.
-  uint8_t order;
-};
-#pragma pack(pop)
-
-// Intended use
-// For each model:
-//   stream through orders jointly in suffix order:
-//     Call MatchedBackoff for full matches.
-//     Call Exit when the context matches.
-//   Call FinishModel with the unigram probability of the correct word, get full
-//   probability in return.
-// Use Backoffs to adjust records that were written to the stream.
-class InstanceMatch {
-  public:
-    InstanceMatch(ModelIndex models, uint8_t max_order, const WordIndex correct)
-      : seen_(std::numeric_limits<WordIndex>::max()),
-        backoffs_(Matrix::Zeros(models, max_order)),
-        correct_(correct), correct_from_(1), correct_ln_prob_(std::numeric_limits<float>::quiet_NaN()) {}
-
-    void MatchedBackoff(ModelIndex model, uint8_t order, float ln_backoff) {
-      backoffs_(model, order - 1) = ln_backoff;
-    }
-
-    // We only want the highest-order matches, which are the first to be exited for a given word.
-    void Exit(const InitialExtension &from, util::stream::Stream &out) {
-      if (from.ext.word == seen_) return;
-      seen_ = from.ext.word;
-      *static_cast<InitialExtension*>(out.Get()) = from;
-      ++out;
-      if (UTIL_UNLIKELY(correct_ == from.ext.word)) {
-        correct_from_ = from.order;
-        correct_ln_prob_ = from.ext.ln_prob;
-      }
-    }
-
-    WordIndex Correct() const { return correct_; }
-
-    // Call this after each model has been passed through.  The 
-    float FinishModel(ModelIndex model, float correct_ln_unigram) {
-      seen_ = std::numeric_limits<WordIndex>::max();
-      // Turn backoffs into multiplied values (added in log space).
-      // So backoffs_(model, order - 1) is the penalty for matching order.
-      float accum = 0.0;
-      for (int order = backoffs_.cols() - 1; order >= 0; --order) {
-        accum += backoffs_(model, order);
-        backoffs_(model, order) = accum;
-      }
-      if (correct_from_ == 1) {
-        correct_ln_prob_ = correct_ln_unigram;
-      }
-      if (correct_from_ - 1 < backoffs_.cols()) {
-        correct_ln_prob_ += backoffs_(model, correct_from_ - 1);
-      }
-      correct_from_ = 1;
-      return correct_ln_prob_;
-    }
-
-    const Matrix &Backoffs() const {
-      return backoffs_;
-    }
-
-  private:
-    // What's the last word we've seen?  Used to act only on exiting the longest match.
-    WordIndex seen_;
-
-    Matrix backoffs_;
-
-    const WordIndex correct_;
-
-    // These only apply to the most recent model.
-    uint8_t correct_from_;
-
-    float correct_ln_prob_;
-};
-
-namespace {
-
-// Forward information to multiple instances of a context.
-class DispatchContext {
-  public:
-    void Register(InstanceMatch &context) {
-      registered_.push_back(&context);
-    }
-
-    void MatchedBackoff(uint8_t order, float ln_backoff) {
-      for (std::vector<InstanceMatch*>::iterator i = registered_.begin(); i != registered_.end(); ++i)
-        (*i)->MatchedBackoff(order, ln_backoff);
-    }
-
-    void Exit(const InitialExtension &from, util::stream::Stream &out) {
-      for (std::vector<InstanceMatch*>::iterator i = registered_.begin(); i != registered_.end(); ++i) {
-        (*i)->Exit(from, out);
-      }
-    }
-
-  private:
-    std::vector<InstanceMatch*> registered_;
-};
-
-// Map from n-gram hash to contexts in the tuning data.
-typedef boost::unordered_map<uint64_t, DispatchContext> ContextMap;
-
-class ApplyBackoffs {
-  public:
-    explicit ApplyBackoffs(const InstanceMatch *backoffs) : backoffs_(backoffs) {}
-
-    void Run(const util::stream::ChainPosition &position) {
-      for (util::stream::Stream stream(position); stream; ++stream) {
-        InitialExtension &ini = *reinterpret_cast<InitialExtension*>(stream.Get());
-        ini.ext.ln_prob += backoffs_[ini.ext.instance] 
-      }
-    }
-
-  private:
-    const InstanceMatch *backoffs_;
-};
-
-Instances::ReadExtensions(util::stream::Chain &on) {
-  if (extensions_first_.get()) {
-    // Lazy sort and save a sorted copy to disk.  TODO: cut down on record size by stripping out order information?
-    extensions_first_->Output(on);
-    extensions_first_->reset();
-    // TODO: apply backoff data!!!!
-
-    extensions_subsequent_.reset(new util::stream::FileBuffer(util::MakeTemp(sorting_config_.temp_prefix)));
-    on >> extensions_subsequent_->Sink();
-  } else {
-    on >> extensions_subsequent_->Source();
-  }
-}
-
-class UnigramLoader {
-  public:
-    UnigramLoader(ContextMap &contexts_for_backoffs, Matrix &ln_probs, std::size_t model_number)
-      : map_(contexts_for_backoffs),
-        prob_(ln_probs.col(model_number)) {}
-
-    void Run(const util::stream::ChainPosition &position) {
-      // TODO handle the case of a unigram model?
-      NGramStream<ProbBackoff> input(position);
-      assert(input);
-      Accum unk = input->Value().prob * M_LN10;
-      WordIndex previous = 0;
-      for (; input; ++input) {
-        WordIndex word = *input->begin();
-        prob_.segment(previous, word - previous) = Vector::Constant(word - previous, unk);
-        prob_(word) = input->Value().prob * M_LN10;
-        ContextMap::iterator i = map_.find(util::MurmurHashNative(input->begin(), sizeof(WordIndex)));
-        if (i != map_.end()) {
-          i->second.MatchedBackoff(1, input->Value().backoff * M_LN10);
-        }
-        previous = word + 1;
-      }
-      prob_.segment(previous, prob_.rows() - previous) = Vector::Constant(prob_.rows() - previous, unk);
-    }
-
-  private:
-    ContextMap &map_;
-    Matrix::ColXpr prob_;
-    std::size_t model_;
-};
-
-class MiddleLoader {
-  public:
-    explicit MiddleLoader(ContextMap &map)
-      : map_(map) {}
-
-    void Run(const util::stream::ChainPosition &position) {
-      NGramStream<ProbBackoff> input(position);
-      const std::size_t full_size = (uint8_t*)input->end() - (uint8_t*)input->begin();
-      const std::size_t context_size = full_size - sizeof(WordIndex);
-      ContextMap::iterator i;
-      for (; input; ++input) {
-        i = map_.find(util::MurmurHashNative(input->begin(), full_size));
-        if (i != map_.end()) {
-          i->second.MatchedBackoff(input->Order(), input->Value().backoff * M_LN10);
-        }
-        i = map_.find(util::MurmurHashNative(input->begin(), context_size));
-        if (i != map_.end()) {
-          i->second.MatchedContext(input->Order(), *(input->end() - 1), input->Value().prob * M_LN10);
-        }
-      }
-    }
-
-  private:
-    ContextMap &map_;
-};
-
-class HighestLoader {
-  public:
-    HighestLoader(ContextMap &map, uint8_t order)
-      : map_(map), order_(order) {}
-
-    void Run(const util::stream::ChainPosition &position) {
-      ContextMap::iterator i;
-      const std::size_t context_size = sizeof(WordIndex) * (order_ - 1);
-      for (ProxyStream<NGram<float> > input(position, NGram<float>(NULL, order_)); input; ++input) {
-        i = map_.find(util::MurmurHashNative(input->begin(), context_size));
-        if (i != map_.end()) {
-          i->second.MatchedContext(order_, *(input->end() - 1), input->Value() * M_LN10);
-        }
-      }
-    }
-
-  private:
-    ContextMap &map_;
-    const uint8_t order_;
-};
-
-class IdentifyTuning : public EnumerateVocab {
-  public:
-    IdentifyTuning(int tuning_file, std::vector<WordIndex> &out) : indices_(out) {
-      indices_.clear();
-      StringPiece line;
-      std::size_t counter = 0;
-      std::vector<std::size_t> &eos = words_[util::MurmurHashNative("</s>", 4)];
-      for (util::FilePiece f(tuning_file); f.ReadLineOrEOF(line);) {
-        for (util::TokenIter<util::BoolCharacter, true> word(line, util::kSpaces); word; ++word) {
-          UTIL_THROW_IF(*word == "<s>" || *word == "</s>", FormatLoadException, "Illegal word in tuning data: " << *word);
-          words_[util::MurmurHashNative(word->data(), word->size())].push_back(counter++);
-        }
-        eos.push_back(counter++);
-      }
-      // Also get <s>
-      indices_.resize(counter + 1);
-      words_[util::MurmurHashNative("<s>", 3)].push_back(indices_.size() - 1);
-    }
-
-    void Add(WordIndex id, const StringPiece &str) {
-      boost::unordered_map<uint64_t, std::vector<std::size_t> >::iterator i = words_.find(util::MurmurHashNative(str.data(), str.size()));
-      if (i != words_.end()) {
-        for (std::vector<std::size_t>::iterator j = i->second.begin(); j != i->second.end(); ++j) {
-          indices_[*j] = id;
-        }
-      }
-    }
-
-    WordIndex FinishGetBOS() {
-      WordIndex ret = indices_.back();
-      indices_.pop_back();
-      return ret;
-    }
-
-  private:
-    std::vector<WordIndex> &indices_;
-
-    boost::unordered_map<uint64_t, std::vector<std::size_t> > words_;
-};
-
-} // namespace
-
-Instance::Instance(std::size_t num_models) : ln_backoff(num_models), ln_correct(num_models), ln_extensions(0, num_models) {}
-
-WordIndex LoadInstances(int tuning_file, const std::vector<StringPiece> &model_names, util::FixedArray<Instance> &instances, Matrix &ln_unigrams) {
-  util::FixedArray<ModelBuffer> models(model_names.size());
-  std::vector<WordIndex> vocab_sizes;
-  vocab_sizes.reserve(model_names.size());
-  util::FixedArray<util::scoped_fd> vocab_files(model_names.size());
-  std::size_t max_order = 0;
-  for (std::vector<StringPiece>::const_iterator i = model_names.begin(); i != model_names.end(); ++i) {
-    models.push_back(*i);
-    vocab_sizes.push_back(models.back().Counts()[0]);
-    vocab_files.push_back(models.back().StealVocabFile());
-    max_order = std::max(max_order, models.back().Order());
-  }
-  UniversalVocab vocab(vocab_sizes);
-  std::vector<WordIndex> tuning_words;
-  WordIndex bos;
-  WordIndex combined_vocab_size;
-  {
-    IdentifyTuning identify(tuning_file, tuning_words);
-    combined_vocab_size = MergeVocab(vocab_files, vocab, identify);
-    bos = identify.FinishGetBOS();
-  }
-
-  instances.Init(tuning_words.size());
-  util::FixedArray<InstanceBuilder> builders(tuning_words.size());
-  std::vector<WordIndex> context;
-  context.push_back(bos);
-
-  // Populate the map from contexts to instance builders.
-  ContextMap cmap;
-  const WordIndex eos = tuning_words.back();
-  for (std::size_t i = 0; i < tuning_words.size(); ++i) {
-    instances.push_back(model_names.size());
-    builders.push_back(tuning_words[i], max_order);
-    for (std::size_t j = 0; j < context.size(); ++j) {
-      cmap[util::MurmurHashNative(&context[j], sizeof(WordIndex) * (context.size() - j))].Register(builders.back());
-    }
-    // Prepare for next word.
-    if (tuning_words[i] == eos) {
-      context.clear();
-      context.push_back(bos);
-    } else {
-      if (context.size() == max_order) {
-        context.erase(context.begin());
-      }
-      context.push_back(tuning_words[i]);
-    }
-  }
-
-  ln_unigrams.resize(combined_vocab_size, models.size());
-
-  // Scan through input files.  Sadly not parallel due to an underlying hash table.
-  for (std::size_t m = 0; m < models.size(); ++m) {
-    for (std::size_t order = 1; order <= models[m].Order(); ++order) {
-      util::stream::Chain chain(util::stream::ChainConfig(sizeof(ProbBackoff) + order * sizeof(WordIndex), 2, 64 * 1048576));
-      models[m].Source(order - 1, chain);
-      chain >> Renumber(vocab.Mapping(m), order);
-      if (order == 1) {
-        chain >> UnigramLoader(cmap, ln_unigrams, m);
-      } else if (order < models[m].Order()) {
-        chain >> MiddleLoader(cmap);
-      } else {
-        chain >> HighestLoader(cmap, order);
-      }
-    }
-    for (std::size_t instance = 0; instance < tuning_words.size(); ++instance) {
-      builders[instance].Dump(m, ln_unigrams, instances[instance]);
-    }
-    ln_unigrams(bos, m) = -99; // Does not matter as long as it does not produce nans since tune_derivatives sets this to zero.
-  }
-  return bos;
-}
-
-}} // namespaces

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/tune_instance.hh
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/tune_instance.hh b/ext/kenlm/lm/interpolate/tune_instance.hh
deleted file mode 100644
index c11eec9..0000000
--- a/ext/kenlm/lm/interpolate/tune_instance.hh
+++ /dev/null
@@ -1,86 +0,0 @@
-#ifndef LM_INTERPOLATE_TUNE_INSTANCE_H
-#define LM_INTERPOLATE_TUNE_INSTANCE_H
-
-#include "lm/interpolate/tune_matrix.hh"
-#include "lm/word_index.hh"
-#include "util/scoped.hh"
-#include "util/stream/config.hh"
-#include "util/string_piece.hh"
-
-#include <boost/optional.hpp>
-
-#include <vector>
-
-namespace util { namespace stream {
-template <class S, class T> class Sort;
-class Chain;
-class FileBuffer;
-}} // namespaces
-
-namespace lm { namespace interpolate {
-
-typedef uint32_t InstanceIndex;
-typedef uint32_t ModelIndex;
-
-struct Extension {
-  // Which tuning instance does this belong to?
-  InstanceIndex instance;
-  WordIndex word;
-  ModelIndex model;
-  // ln p_{model} (word | context(instance))
-  float ln_prob;
-
-  bool operator<(const Extension &other) const {
-    if (instance != other.instance)
-      return instance < other.instance;
-    if (word != other.word)
-      return word < other.word;
-    if (model != other.model)
-      return model < other.model;
-    return false;
-  }
-};
-
-class Instances {
-  public:
-    Instances(int tune_file, const std::vector<StringPiece> &model_names);
-
-    Eigen::ConstRowXpr Backoffs(InstanceIndex instance) const {
-      return ln_backoffs_.row(instance);
-    }
-
-    const Vector &CorrectGradientTerm() const { return neg_ln_correct_sum_; }
-
-    const Matrix &LNUnigrams() const { return ln_unigrams_; }
-
-    void ReadExtensions(util::stream::Chain &to);
-
-  private:
-    // backoffs_(instance, model) is the backoff all the way to unigrams.
-    typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> BackoffMatrix;
-    BackoffMatrix ln_backoffs_;
-    
-    // neg_correct_sum_(model) = -\sum_{instances} ln p_{model}(correct(instance) | context(instance)).
-    // This appears as a term in the gradient.
-    Vector neg_ln_correct_sum_;
-
-    // unigrams_(word, model) = ln p_{model}(word).
-    Matrix ln_unigrams_;
-
-    struct ExtensionCompare {
-      bool operator()(const void *f, const void *s) const {
-        return reinterpret_cast<const Extension &>(f) < reinterpret_cast<const Extension &>(s);
-      }    
-    };
-
-    // This is the source of data for the first iteration.
-    util::scoped_ptr<util::stream::Sort<ExtensionCompare> > extensions_first_;
-
-    // Source of data for subsequent iterations.  This contains already-sorted data.
-    util::scoped_ptr<util::stream::FileBuffer> extensions_subsequent_;
-
-    const util::stream::SortConfig sorting_config_;
-};
-
-}} // namespaces
-#endif // LM_INTERPOLATE_TUNE_INSTANCE_H

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/tune_instance_data/generate.sh
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/tune_instance_data/generate.sh b/ext/kenlm/lm/interpolate/tune_instance_data/generate.sh
deleted file mode 100755
index d725572..0000000
--- a/ext/kenlm/lm/interpolate/tune_instance_data/generate.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-../../../bin/lmplz --discount_fallback -o 3 -S 100M --intermediate toy0 --arpa toy0.arpa <<EOF
-a a b a
-b a a b
-EOF
-../../../bin/lmplz --discount_fallback -o 3 -S 100M --intermediate toy1 --arpa toy1.arpa <<EOF
-a a b b b b b b b
-c
-EOF

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/tune_instance_data/toy0.1
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/tune_instance_data/toy0.1 b/ext/kenlm/lm/interpolate/tune_instance_data/toy0.1
deleted file mode 100644
index 1b66c51..0000000
Binary files a/ext/kenlm/lm/interpolate/tune_instance_data/toy0.1 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/tune_instance_data/toy0.2
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/tune_instance_data/toy0.2 b/ext/kenlm/lm/interpolate/tune_instance_data/toy0.2
deleted file mode 100644
index d735b1c..0000000
Binary files a/ext/kenlm/lm/interpolate/tune_instance_data/toy0.2 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/tune_instance_data/toy0.3
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/tune_instance_data/toy0.3 b/ext/kenlm/lm/interpolate/tune_instance_data/toy0.3
deleted file mode 100644
index 2d97aa3..0000000
Binary files a/ext/kenlm/lm/interpolate/tune_instance_data/toy0.3 and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/tune_instance_data/toy0.kenlm_intermediate
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/tune_instance_data/toy0.kenlm_intermediate b/ext/kenlm/lm/interpolate/tune_instance_data/toy0.kenlm_intermediate
deleted file mode 100644
index 8513475..0000000
--- a/ext/kenlm/lm/interpolate/tune_instance_data/toy0.kenlm_intermediate
+++ /dev/null
@@ -1,3 +0,0 @@
-KenLM intermediate binary file
-Counts 5 7 7
-Payload pb

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/tune_instance_data/toy0.vocab
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/tune_instance_data/toy0.vocab b/ext/kenlm/lm/interpolate/tune_instance_data/toy0.vocab
deleted file mode 100644
index 520c0f9..0000000
Binary files a/ext/kenlm/lm/interpolate/tune_instance_data/toy0.vocab and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/interpolate/tune_instance_data/toy1.1
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/interpolate/tune_instance_data/toy1.1 b/ext/kenlm/lm/interpolate/tune_instance_data/toy1.1
deleted file mode 100644
index a50cec6..0000000
Binary files a/ext/kenlm/lm/interpolate/tune_instance_data/toy1.1 and /dev/null differ