You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/19 21:33:50 UTC

[02/51] [partial] incubator-joshua git commit: Converted KenLM into a submodule

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/double-conversion/strtod.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/double-conversion/strtod.cc b/ext/kenlm/util/double-conversion/strtod.cc
deleted file mode 100644
index 55b4daa..0000000
--- a/ext/kenlm/util/double-conversion/strtod.cc
+++ /dev/null
@@ -1,558 +0,0 @@
-// Copyright 2010 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-//       notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-//       copyright notice, this list of conditions and the following
-//       disclaimer in the documentation and/or other materials provided
-//       with the distribution.
-//     * Neither the name of Google Inc. nor the names of its
-//       contributors may be used to endorse or promote products derived
-//       from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <cstdarg>
-#include <climits>
-
-#include "strtod.h"
-#include "bignum.h"
-#include "cached-powers.h"
-#include "ieee.h"
-
-namespace double_conversion {
-
-// 2^53 = 9007199254740992.
-// Any integer with at most 15 decimal digits will hence fit into a double
-// (which has a 53bit significand) without loss of precision.
-static const int kMaxExactDoubleIntegerDecimalDigits = 15;
-// 2^64 = 18446744073709551616 > 10^19
-static const int kMaxUint64DecimalDigits = 19;
-
-// Max double: 1.7976931348623157 x 10^308
-// Min non-zero double: 4.9406564584124654 x 10^-324
-// Any x >= 10^309 is interpreted as +infinity.
-// Any x <= 10^-324 is interpreted as 0.
-// Note that 2.5e-324 (despite being smaller than the min double) will be read
-// as non-zero (equal to the min non-zero double).
-static const int kMaxDecimalPower = 309;
-static const int kMinDecimalPower = -324;
-
-// 2^64 = 18446744073709551616
-static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF);
-
-
-static const double exact_powers_of_ten[] = {
-  1.0,  // 10^0
-  10.0,
-  100.0,
-  1000.0,
-  10000.0,
-  100000.0,
-  1000000.0,
-  10000000.0,
-  100000000.0,
-  1000000000.0,
-  10000000000.0,  // 10^10
-  100000000000.0,
-  1000000000000.0,
-  10000000000000.0,
-  100000000000000.0,
-  1000000000000000.0,
-  10000000000000000.0,
-  100000000000000000.0,
-  1000000000000000000.0,
-  10000000000000000000.0,
-  100000000000000000000.0,  // 10^20
-  1000000000000000000000.0,
-  // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22
-  10000000000000000000000.0
-};
-static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten);
-
-// Maximum number of significant digits in the decimal representation.
-// In fact the value is 772 (see conversions.cc), but to give us some margin
-// we round up to 780.
-static const int kMaxSignificantDecimalDigits = 780;
-
-static Vector<const char> TrimLeadingZeros(Vector<const char> buffer) {
-  for (int i = 0; i < buffer.length(); i++) {
-    if (buffer[i] != '0') {
-      return buffer.SubVector(i, buffer.length());
-    }
-  }
-  return Vector<const char>(buffer.start(), 0);
-}
-
-
-static Vector<const char> TrimTrailingZeros(Vector<const char> buffer) {
-  for (int i = buffer.length() - 1; i >= 0; --i) {
-    if (buffer[i] != '0') {
-      return buffer.SubVector(0, i + 1);
-    }
-  }
-  return Vector<const char>(buffer.start(), 0);
-}
-
-
-static void CutToMaxSignificantDigits(Vector<const char> buffer,
-                                       int exponent,
-                                       char* significant_buffer,
-                                       int* significant_exponent) {
-  for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) {
-    significant_buffer[i] = buffer[i];
-  }
-  // The input buffer has been trimmed. Therefore the last digit must be
-  // different from '0'.
-  ASSERT(buffer[buffer.length() - 1] != '0');
-  // Set the last digit to be non-zero. This is sufficient to guarantee
-  // correct rounding.
-  significant_buffer[kMaxSignificantDecimalDigits - 1] = '1';
-  *significant_exponent =
-      exponent + (buffer.length() - kMaxSignificantDecimalDigits);
-}
-
-
-// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits.
-// If possible the input-buffer is reused, but if the buffer needs to be
-// modified (due to cutting), then the input needs to be copied into the
-// buffer_copy_space.
-static void TrimAndCut(Vector<const char> buffer, int exponent,
-                       char* buffer_copy_space, int space_size,
-                       Vector<const char>* trimmed, int* updated_exponent) {
-  Vector<const char> left_trimmed = TrimLeadingZeros(buffer);
-  Vector<const char> right_trimmed = TrimTrailingZeros(left_trimmed);
-  exponent += left_trimmed.length() - right_trimmed.length();
-  if (right_trimmed.length() > kMaxSignificantDecimalDigits) {
-    ASSERT(space_size >= kMaxSignificantDecimalDigits);
-    CutToMaxSignificantDigits(right_trimmed, exponent,
-                              buffer_copy_space, updated_exponent);
-    *trimmed = Vector<const char>(buffer_copy_space,
-                                 kMaxSignificantDecimalDigits);
-  } else {
-    *trimmed = right_trimmed;
-    *updated_exponent = exponent;
-  }
-}
-
-
-// Reads digits from the buffer and converts them to a uint64.
-// Reads in as many digits as fit into a uint64.
-// When the string starts with "1844674407370955161" no further digit is read.
-// Since 2^64 = 18446744073709551616 it would still be possible read another
-// digit if it was less or equal than 6, but this would complicate the code.
-static uint64_t ReadUint64(Vector<const char> buffer,
-                           int* number_of_read_digits) {
-  uint64_t result = 0;
-  int i = 0;
-  while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) {
-    int digit = buffer[i++] - '0';
-    ASSERT(0 <= digit && digit <= 9);
-    result = 10 * result + digit;
-  }
-  *number_of_read_digits = i;
-  return result;
-}
-
-
-// Reads a DiyFp from the buffer.
-// The returned DiyFp is not necessarily normalized.
-// If remaining_decimals is zero then the returned DiyFp is accurate.
-// Otherwise it has been rounded and has error of at most 1/2 ulp.
-static void ReadDiyFp(Vector<const char> buffer,
-                      DiyFp* result,
-                      int* remaining_decimals) {
-  int read_digits;
-  uint64_t significand = ReadUint64(buffer, &read_digits);
-  if (buffer.length() == read_digits) {
-    *result = DiyFp(significand, 0);
-    *remaining_decimals = 0;
-  } else {
-    // Round the significand.
-    if (buffer[read_digits] >= '5') {
-      significand++;
-    }
-    // Compute the binary exponent.
-    int exponent = 0;
-    *result = DiyFp(significand, exponent);
-    *remaining_decimals = buffer.length() - read_digits;
-  }
-}
-
-
-static bool DoubleStrtod(Vector<const char> trimmed,
-                         int exponent,
-                         double* result) {
-#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS)
-  // On x86 the floating-point stack can be 64 or 80 bits wide. If it is
-  // 80 bits wide (as is the case on Linux) then double-rounding occurs and the
-  // result is not accurate.
-  // We know that Windows32 uses 64 bits and is therefore accurate.
-  // Note that the ARM simulator is compiled for 32bits. It therefore exhibits
-  // the same problem.
-  return false;
-#endif
-  if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) {
-    int read_digits;
-    // The trimmed input fits into a double.
-    // If the 10^exponent (resp. 10^-exponent) fits into a double too then we
-    // can compute the result-double simply by multiplying (resp. dividing) the
-    // two numbers.
-    // This is possible because IEEE guarantees that floating-point operations
-    // return the best possible approximation.
-    if (exponent < 0 && -exponent < kExactPowersOfTenSize) {
-      // 10^-exponent fits into a double.
-      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
-      ASSERT(read_digits == trimmed.length());
-      *result /= exact_powers_of_ten[-exponent];
-      return true;
-    }
-    if (0 <= exponent && exponent < kExactPowersOfTenSize) {
-      // 10^exponent fits into a double.
-      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
-      ASSERT(read_digits == trimmed.length());
-      *result *= exact_powers_of_ten[exponent];
-      return true;
-    }
-    int remaining_digits =
-        kMaxExactDoubleIntegerDecimalDigits - trimmed.length();
-    if ((0 <= exponent) &&
-        (exponent - remaining_digits < kExactPowersOfTenSize)) {
-      // The trimmed string was short and we can multiply it with
-      // 10^remaining_digits. As a result the remaining exponent now fits
-      // into a double too.
-      *result = static_cast<double>(ReadUint64(trimmed, &read_digits));
-      ASSERT(read_digits == trimmed.length());
-      *result *= exact_powers_of_ten[remaining_digits];
-      *result *= exact_powers_of_ten[exponent - remaining_digits];
-      return true;
-    }
-  }
-  return false;
-}
-
-
-// Returns 10^exponent as an exact DiyFp.
-// The given exponent must be in the range [1; kDecimalExponentDistance[.
-static DiyFp AdjustmentPowerOfTen(int exponent) {
-  ASSERT(0 < exponent);
-  ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance);
-  // Simply hardcode the remaining powers for the given decimal exponent
-  // distance.
-  ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8);
-  switch (exponent) {
-    case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60);
-    case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57);
-    case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54);
-    case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50);
-    case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47);
-    case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44);
-    case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40);
-    default:
-      UNREACHABLE();
-      return DiyFp(0, 0);
-  }
-}
-
-
-// If the function returns true then the result is the correct double.
-// Otherwise it is either the correct double or the double that is just below
-// the correct double.
-static bool DiyFpStrtod(Vector<const char> buffer,
-                        int exponent,
-                        double* result) {
-  DiyFp input;
-  int remaining_decimals;
-  ReadDiyFp(buffer, &input, &remaining_decimals);
-  // Since we may have dropped some digits the input is not accurate.
-  // If remaining_decimals is different than 0 than the error is at most
-  // .5 ulp (unit in the last place).
-  // We don't want to deal with fractions and therefore keep a common
-  // denominator.
-  const int kDenominatorLog = 3;
-  const int kDenominator = 1 << kDenominatorLog;
-  // Move the remaining decimals into the exponent.
-  exponent += remaining_decimals;
-  int error = (remaining_decimals == 0 ? 0 : kDenominator / 2);
-
-  int old_e = input.e();
-  input.Normalize();
-  error <<= old_e - input.e();
-
-  ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent);
-  if (exponent < PowersOfTenCache::kMinDecimalExponent) {
-    *result = 0.0;
-    return true;
-  }
-  DiyFp cached_power;
-  int cached_decimal_exponent;
-  PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent,
-                                                     &cached_power,
-                                                     &cached_decimal_exponent);
-
-  if (cached_decimal_exponent != exponent) {
-    int adjustment_exponent = exponent - cached_decimal_exponent;
-    DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent);
-    input.Multiply(adjustment_power);
-    if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) {
-      // The product of input with the adjustment power fits into a 64 bit
-      // integer.
-      ASSERT(DiyFp::kSignificandSize == 64);
-    } else {
-      // The adjustment power is exact. There is hence only an error of 0.5.
-      error += kDenominator / 2;
-    }
-  }
-
-  input.Multiply(cached_power);
-  // The error introduced by a multiplication of a*b equals
-  //   error_a + error_b + error_a*error_b/2^64 + 0.5
-  // Substituting a with 'input' and b with 'cached_power' we have
-  //   error_b = 0.5  (all cached powers have an error of less than 0.5 ulp),
-  //   error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64
-  int error_b = kDenominator / 2;
-  int error_ab = (error == 0 ? 0 : 1);  // We round up to 1.
-  int fixed_error = kDenominator / 2;
-  error += error_b + error_ab + fixed_error;
-
-  old_e = input.e();
-  input.Normalize();
-  error <<= old_e - input.e();
-
-  // See if the double's significand changes if we add/subtract the error.
-  int order_of_magnitude = DiyFp::kSignificandSize + input.e();
-  int effective_significand_size =
-      Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude);
-  int precision_digits_count =
-      DiyFp::kSignificandSize - effective_significand_size;
-  if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) {
-    // This can only happen for very small denormals. In this case the
-    // half-way multiplied by the denominator exceeds the range of an uint64.
-    // Simply shift everything to the right.
-    int shift_amount = (precision_digits_count + kDenominatorLog) -
-        DiyFp::kSignificandSize + 1;
-    input.set_f(input.f() >> shift_amount);
-    input.set_e(input.e() + shift_amount);
-    // We add 1 for the lost precision of error, and kDenominator for
-    // the lost precision of input.f().
-    error = (error >> shift_amount) + 1 + kDenominator;
-    precision_digits_count -= shift_amount;
-  }
-  // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too.
-  ASSERT(DiyFp::kSignificandSize == 64);
-  ASSERT(precision_digits_count < 64);
-  uint64_t one64 = 1;
-  uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1;
-  uint64_t precision_bits = input.f() & precision_bits_mask;
-  uint64_t half_way = one64 << (precision_digits_count - 1);
-  precision_bits *= kDenominator;
-  half_way *= kDenominator;
-  DiyFp rounded_input(input.f() >> precision_digits_count,
-                      input.e() + precision_digits_count);
-  if (precision_bits >= half_way + error) {
-    rounded_input.set_f(rounded_input.f() + 1);
-  }
-  // If the last_bits are too close to the half-way case than we are too
-  // inaccurate and round down. In this case we return false so that we can
-  // fall back to a more precise algorithm.
-
-  *result = Double(rounded_input).value();
-  if (half_way - error < precision_bits && precision_bits < half_way + error) {
-    // Too imprecise. The caller will have to fall back to a slower version.
-    // However the returned number is guaranteed to be either the correct
-    // double, or the next-lower double.
-    return false;
-  } else {
-    return true;
-  }
-}
-
-
-// Returns
-//   - -1 if buffer*10^exponent < diy_fp.
-//   -  0 if buffer*10^exponent == diy_fp.
-//   - +1 if buffer*10^exponent > diy_fp.
-// Preconditions:
-//   buffer.length() + exponent <= kMaxDecimalPower + 1
-//   buffer.length() + exponent > kMinDecimalPower
-//   buffer.length() <= kMaxDecimalSignificantDigits
-static int CompareBufferWithDiyFp(Vector<const char> buffer,
-                                  int exponent,
-                                  DiyFp diy_fp) {
-  ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1);
-  ASSERT(buffer.length() + exponent > kMinDecimalPower);
-  ASSERT(buffer.length() <= kMaxSignificantDecimalDigits);
-  // Make sure that the Bignum will be able to hold all our numbers.
-  // Our Bignum implementation has a separate field for exponents. Shifts will
-  // consume at most one bigit (< 64 bits).
-  // ln(10) == 3.3219...
-  ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits);
-  Bignum buffer_bignum;
-  Bignum diy_fp_bignum;
-  buffer_bignum.AssignDecimalString(buffer);
-  diy_fp_bignum.AssignUInt64(diy_fp.f());
-  if (exponent >= 0) {
-    buffer_bignum.MultiplyByPowerOfTen(exponent);
-  } else {
-    diy_fp_bignum.MultiplyByPowerOfTen(-exponent);
-  }
-  if (diy_fp.e() > 0) {
-    diy_fp_bignum.ShiftLeft(diy_fp.e());
-  } else {
-    buffer_bignum.ShiftLeft(-diy_fp.e());
-  }
-  return Bignum::Compare(buffer_bignum, diy_fp_bignum);
-}
-
-
-// Returns true if the guess is the correct double.
-// Returns false, when guess is either correct or the next-lower double.
-static bool ComputeGuess(Vector<const char> trimmed, int exponent,
-                         double* guess) {
-  if (trimmed.length() == 0) {
-    *guess = 0.0;
-    return true;
-  }
-  if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) {
-    *guess = Double::Infinity();
-    return true;
-  }
-  if (exponent + trimmed.length() <= kMinDecimalPower) {
-    *guess = 0.0;
-    return true;
-  }
-
-  if (DoubleStrtod(trimmed, exponent, guess) ||
-      DiyFpStrtod(trimmed, exponent, guess)) {
-    return true;
-  }
-  if (*guess == Double::Infinity()) {
-    return true;
-  }
-  return false;
-}
-
-double Strtod(Vector<const char> buffer, int exponent) {
-  char copy_buffer[kMaxSignificantDecimalDigits];
-  Vector<const char> trimmed;
-  int updated_exponent;
-  TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
-             &trimmed, &updated_exponent);
-  exponent = updated_exponent;
-
-  double guess;
-  bool is_correct = ComputeGuess(trimmed, exponent, &guess);
-  if (is_correct) return guess;
-
-  DiyFp upper_boundary = Double(guess).UpperBoundary();
-  int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
-  if (comparison < 0) {
-    return guess;
-  } else if (comparison > 0) {
-    return Double(guess).NextDouble();
-  } else if ((Double(guess).Significand() & 1) == 0) {
-    // Round towards even.
-    return guess;
-  } else {
-    return Double(guess).NextDouble();
-  }
-}
-
-float Strtof(Vector<const char> buffer, int exponent) {
-  char copy_buffer[kMaxSignificantDecimalDigits];
-  Vector<const char> trimmed;
-  int updated_exponent;
-  TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits,
-             &trimmed, &updated_exponent);
-  exponent = updated_exponent;
-
-  double double_guess;
-  bool is_correct = ComputeGuess(trimmed, exponent, &double_guess);
-
-  float float_guess = static_cast<float>(double_guess);
-  if (float_guess == double_guess) {
-    // This shortcut triggers for integer values.
-    return float_guess;
-  }
-
-  // We must catch double-rounding. Say the double has been rounded up, and is
-  // now a boundary of a float, and rounds up again. This is why we have to
-  // look at previous too.
-  // Example (in decimal numbers):
-  //    input: 12349
-  //    high-precision (4 digits): 1235
-  //    low-precision (3 digits):
-  //       when read from input: 123
-  //       when rounded from high precision: 124.
-  // To do this we simply look at the neigbors of the correct result and see
-  // if they would round to the same float. If the guess is not correct we have
-  // to look at four values (since two different doubles could be the correct
-  // double).
-
-  double double_next = Double(double_guess).NextDouble();
-  double double_previous = Double(double_guess).PreviousDouble();
-
-  float f1 = static_cast<float>(double_previous);
-#ifndef NDEBUG
-  float f2 = float_guess;
-#endif
-  float f3 = static_cast<float>(double_next);
-  float f4;
-  if (is_correct) {
-    f4 = f3;
-  } else {
-    double double_next2 = Double(double_next).NextDouble();
-    f4 = static_cast<float>(double_next2);
-  }
-#ifndef NDEBUG
-  ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4);
-#endif
-
-  // If the guess doesn't lie near a single-precision boundary we can simply
-  // return its float-value.
-  if (f1 == f4) {
-    return float_guess;
-  }
-
-  ASSERT((f1 != f2 && f2 == f3 && f3 == f4) ||
-         (f1 == f2 && f2 != f3 && f3 == f4) ||
-         (f1 == f2 && f2 == f3 && f3 != f4));
-
-  // guess and next are the two possible canditates (in the same way that
-  // double_guess was the lower candidate for a double-precision guess).
-  float guess = f1;
-  float next = f4;
-  DiyFp upper_boundary;
-  if (guess == 0.0f) {
-    float min_float = 1e-45f;
-    upper_boundary = Double(static_cast<double>(min_float) / 2).AsDiyFp();
-  } else {
-    upper_boundary = Single(guess).UpperBoundary();
-  }
-  int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary);
-  if (comparison < 0) {
-    return guess;
-  } else if (comparison > 0) {
-    return next;
-  } else if ((Single(guess).Significand() & 1) == 0) {
-    // Round towards even.
-    return guess;
-  } else {
-    return next;
-  }
-}
-
-}  // namespace double_conversion

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/double-conversion/strtod.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/double-conversion/strtod.h b/ext/kenlm/util/double-conversion/strtod.h
deleted file mode 100644
index ed0293b..0000000
--- a/ext/kenlm/util/double-conversion/strtod.h
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2010 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-//       notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-//       copyright notice, this list of conditions and the following
-//       disclaimer in the documentation and/or other materials provided
-//       with the distribution.
-//     * Neither the name of Google Inc. nor the names of its
-//       contributors may be used to endorse or promote products derived
-//       from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef DOUBLE_CONVERSION_STRTOD_H_
-#define DOUBLE_CONVERSION_STRTOD_H_
-
-#include "utils.h"
-
-namespace double_conversion {
-
-// The buffer must only contain digits in the range [0-9]. It must not
-// contain a dot or a sign. It must not start with '0', and must not be empty.
-double Strtod(Vector<const char> buffer, int exponent);
-
-// The buffer must only contain digits in the range [0-9]. It must not
-// contain a dot or a sign. It must not start with '0', and must not be empty.
-float Strtof(Vector<const char> buffer, int exponent);
-
-}  // namespace double_conversion
-
-#endif  // DOUBLE_CONVERSION_STRTOD_H_

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/double-conversion/utils.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/double-conversion/utils.h b/ext/kenlm/util/double-conversion/utils.h
deleted file mode 100644
index 9ccb3b6..0000000
--- a/ext/kenlm/util/double-conversion/utils.h
+++ /dev/null
@@ -1,320 +0,0 @@
-// Copyright 2010 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-//       notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-//       copyright notice, this list of conditions and the following
-//       disclaimer in the documentation and/or other materials provided
-//       with the distribution.
-//     * Neither the name of Google Inc. nor the names of its
-//       contributors may be used to endorse or promote products derived
-//       from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef DOUBLE_CONVERSION_UTILS_H_
-#define DOUBLE_CONVERSION_UTILS_H_
-
-#include <stdlib.h>
-#include <string.h>
-
-#include <assert.h>
-#ifndef ASSERT
-#define ASSERT(condition)      (assert(condition))
-#endif
-#ifndef UNIMPLEMENTED
-#define UNIMPLEMENTED() (abort())
-#endif
-#ifndef UNREACHABLE
-#define UNREACHABLE()   (abort())
-#endif
-
-// Double operations detection based on target architecture.
-// Linux uses a 80bit wide floating point stack on x86. This induces double
-// rounding, which in turn leads to wrong results.
-// An easy way to test if the floating-point operations are correct is to
-// evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then
-// the result is equal to 89255e-22.
-// The best way to test this, is to create a division-function and to compare
-// the output of the division with the expected result. (Inlining must be
-// disabled.)
-// On Linux,x86 89255e-22 != Div_double(89255.0/1e22)
-#if defined(_M_X64) || defined(__x86_64__) || \
-    defined(__ARMEL__) || defined(__avr32__) || \
-    defined(__hppa__) || defined(__ia64__) || \
-    defined(__mips__) || defined(__powerpc__) || \
-    defined(__sparc__) || defined(__sparc) || defined(__s390__) || \
-    defined(__SH4__) || defined(__alpha__) || \
-    defined(_MIPS_ARCH_MIPS32R2)
-#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
-#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
-#if defined(_WIN32)
-// Windows uses a 64bit wide floating point stack.
-#define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1
-#else
-#undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS
-#endif  // _WIN32
-#else
-#error Target architecture was not detected as supported by Double-Conversion.
-#endif
-
-
-#if defined(_WIN32) && !defined(__MINGW32__)
-
-typedef signed char int8_t;
-typedef unsigned char uint8_t;
-typedef short int16_t;  // NOLINT
-typedef unsigned short uint16_t;  // NOLINT
-typedef int int32_t;
-typedef unsigned int uint32_t;
-typedef __int64 int64_t;
-typedef unsigned __int64 uint64_t;
-// intptr_t and friends are defined in crtdefs.h through stdio.h.
-
-#else
-
-#include <stdint.h>
-
-#endif
-
-// The following macro works on both 32 and 64-bit platforms.
-// Usage: instead of writing 0x1234567890123456
-//      write UINT64_2PART_C(0x12345678,90123456);
-#define UINT64_2PART_C(a, b) (((static_cast<uint64_t>(a) << 32) + 0x##b##u))
-
-
-// The expression ARRAY_SIZE(a) is a compile-time constant of type
-// size_t which represents the number of elements of the given
-// array. You should only use ARRAY_SIZE on statically allocated
-// arrays.
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(a)                                   \
-  ((sizeof(a) / sizeof(*(a))) /                         \
-  static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
-#endif
-
-// A macro to disallow the evil copy constructor and operator= functions
-// This should be used in the private: declarations for a class
-#ifndef DISALLOW_COPY_AND_ASSIGN
-#define DISALLOW_COPY_AND_ASSIGN(TypeName)      \
-  TypeName(const TypeName&);                    \
-  void operator=(const TypeName&)
-#endif
-
-// A macro to disallow all the implicit constructors, namely the
-// default constructor, copy constructor and operator= functions.
-//
-// This should be used in the private: declarations for a class
-// that wants to prevent anyone from instantiating it. This is
-// especially useful for classes containing only static methods.
-#ifndef DISALLOW_IMPLICIT_CONSTRUCTORS
-#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
-  TypeName();                                    \
-  DISALLOW_COPY_AND_ASSIGN(TypeName)
-#endif
-
-namespace double_conversion {
-
-static const int kCharSize = sizeof(char);
-
-// Returns the maximum of the two parameters.
-template <typename T>
-static T Max(T a, T b) {
-  return a < b ? b : a;
-}
-
-
-// Returns the minimum of the two parameters.
-template <typename T>
-static T Min(T a, T b) {
-  return a < b ? a : b;
-}
-
-
-inline int StrLength(const char* string) {
-  size_t length = strlen(string);
-  ASSERT(length == static_cast<size_t>(static_cast<int>(length)));
-  return static_cast<int>(length);
-}
-
-// This is a simplified version of V8's Vector class.
-template <typename T>
-class Vector {
- public:
-  Vector() : start_(NULL), length_(0) {}
-  Vector(T* data, int length) : start_(data), length_(length) {
-    ASSERT(length == 0 || (length > 0 && data != NULL));
-  }
-
-  // Returns a vector using the same backing storage as this one,
-  // spanning from and including 'from', to but not including 'to'.
-  Vector<T> SubVector(int from, int to) {
-    ASSERT(to <= length_);
-    ASSERT(from < to);
-    ASSERT(0 <= from);
-    return Vector<T>(start() + from, to - from);
-  }
-
-  // Returns the length of the vector.
-  int length() const { return length_; }
-
-  // Returns whether or not the vector is empty.
-  bool is_empty() const { return length_ == 0; }
-
-  // Returns the pointer to the start of the data in the vector.
-  T* start() const { return start_; }
-
-  // Access individual vector elements - checks bounds in debug mode.
-  T& operator[](int index) const {
-    ASSERT(0 <= index && index < length_);
-    return start_[index];
-  }
-
-  T& first() { return start_[0]; }
-
-  T& last() { return start_[length_ - 1]; }
-
- private:
-  T* start_;
-  int length_;
-};
-
-
-// Helper class for building result strings in a character buffer. The
-// purpose of the class is to use safe operations that checks the
-// buffer bounds on all operations in debug mode.
-class StringBuilder {
- public:
-  StringBuilder(char* buffer, int size)
-      : buffer_(buffer, size), position_(0) { }
-
-  ~StringBuilder() { if (!is_finalized()) Finalize(); }
-
-  int size() const { return buffer_.length(); }
-
-  // Get the current position in the builder.
-  int position() const {
-    ASSERT(!is_finalized());
-    return position_;
-  }
-
-  // Reset the position.
-  void Reset() { position_ = 0; }
-
-  // Add a single character to the builder. It is not allowed to add
-  // 0-characters; use the Finalize() method to terminate the string
-  // instead.
-  void AddCharacter(char c) {
-    // I just extract raw data not a cstr so null is fine.
-    //ASSERT(c != '\0');
-    ASSERT(!is_finalized() && position_ < buffer_.length());
-    buffer_[position_++] = c;
-  }
-
-  // Add an entire string to the builder. Uses strlen() internally to
-  // compute the length of the input string.
-  void AddString(const char* s) {
-    AddSubstring(s, StrLength(s));
-  }
-
-  // Add the first 'n' characters of the given string 's' to the
-  // builder. The input string must have enough characters.
-  void AddSubstring(const char* s, int n) {
-    ASSERT(!is_finalized() && position_ + n < buffer_.length());
-    // I just extract raw data not a cstr so null is fine.
-    //ASSERT(static_cast<size_t>(n) <= strlen(s));
-    memmove(&buffer_[position_], s, n * kCharSize);
-    position_ += n;
-  }
-
-
-  // Add character padding to the builder. If count is non-positive,
-  // nothing is added to the builder.
-  void AddPadding(char c, int count) {
-    for (int i = 0; i < count; i++) {
-      AddCharacter(c);
-    }
-  }
-
-  // Finalize the string by 0-terminating it and returning the buffer.
-  char* Finalize() {
-    ASSERT(!is_finalized() && position_ < buffer_.length());
-    buffer_[position_] = '\0';
-    // Make sure nobody managed to add a 0-character to the
-    // buffer while building the string.
-    // I just extract raw data not a cstr so null is fine.
-    //ASSERT(strlen(buffer_.start()) == static_cast<size_t>(position_));
-    position_ = -1;
-    ASSERT(is_finalized());
-    return buffer_.start();
-  }
-
- private:
-  Vector<char> buffer_;
-  int position_;
-
-  bool is_finalized() const { return position_ < 0; }
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder);
-};
-
-// The type-based aliasing rule allows the compiler to assume that pointers of
-// different types (for some definition of different) never alias each other.
-// Thus the following code does not work:
-//
-// float f = foo();
-// int fbits = *(int*)(&f);
-//
-// The compiler 'knows' that the int pointer can't refer to f since the types
-// don't match, so the compiler may cache f in a register, leaving random data
-// in fbits.  Using C++ style casts makes no difference, however a pointer to
-// char data is assumed to alias any other pointer.  This is the 'memcpy
-// exception'.
-//
-// Bit_cast uses the memcpy exception to move the bits from a variable of one
-// type of a variable of another type.  Of course the end result is likely to
-// be implementation dependent.  Most compilers (gcc-4.2 and MSVC 2005)
-// will completely optimize BitCast away.
-//
-// There is an additional use for BitCast.
-// Recent gccs will warn when they see casts that may result in breakage due to
-// the type-based aliasing rule.  If you have checked that there is no breakage
-// you can use BitCast to cast one pointer type to another.  This confuses gcc
-// enough that it can no longer see that you have cast one pointer type to
-// another thus avoiding the warning.
-template <class Dest, class Source>
-inline Dest BitCast(const Source& source) {
-  // Compile time assertion: sizeof(Dest) == sizeof(Source)
-  // A compile error here means your Dest and Source have different sizes.
-  typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]
-#if __GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 8
-      __attribute__((unused))
-#endif
-      ;
-
-  Dest dest;
-  memmove(&dest, &source, sizeof(dest));
-  return dest;
-}
-
-template <class Dest, class Source>
-inline Dest BitCast(Source* source) {
-  return BitCast<Dest>(reinterpret_cast<uintptr_t>(source));
-}
-
-}  // namespace double_conversion
-
-#endif  // DOUBLE_CONVERSION_UTILS_H_

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/ersatz_progress.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/ersatz_progress.cc b/ext/kenlm/util/ersatz_progress.cc
deleted file mode 100644
index 55c82e7..0000000
--- a/ext/kenlm/util/ersatz_progress.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-#include "util/ersatz_progress.hh"
-
-#include <algorithm>
-#include <ostream>
-#include <limits>
-#include <string>
-
-namespace util {
-
-namespace { const unsigned char kWidth = 100; }
-
-const char kProgressBanner[] = "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n";
-
-ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<uint64_t>::max()), complete_(next_), out_(NULL) {}
-
-ErsatzProgress::~ErsatzProgress() {
-  if (out_) Finished();
-}
-
-ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message)
-  : current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) {
-  if (!out_) {
-    next_ = std::numeric_limits<uint64_t>::max();
-    return;
-  }
-  if (!message.empty()) *out_ << message << '\n';
-  *out_ << kProgressBanner;
-}
-
-void ErsatzProgress::Milestone() {
-  if (!out_) { current_ = 0; return; }
-  if (!complete_) return;
-  unsigned char stone = std::min(static_cast<uint64_t>(kWidth), (current_ * kWidth) / complete_);
-
-  for (; stones_written_ < stone; ++stones_written_) {
-    (*out_) << '*';
-  }
-  if (stone == kWidth) {
-    (*out_) << std::endl;
-    next_ = std::numeric_limits<uint64_t>::max();
-    out_ = NULL;
-  } else {
-    next_ = std::max(next_, ((stone + 1) * complete_ + kWidth - 1) / kWidth);
-  }
-}
-
-} // namespace util

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/ersatz_progress.hh
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/ersatz_progress.hh b/ext/kenlm/util/ersatz_progress.hh
deleted file mode 100644
index b47aded..0000000
--- a/ext/kenlm/util/ersatz_progress.hh
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef UTIL_ERSATZ_PROGRESS_H
-#define UTIL_ERSATZ_PROGRESS_H
-
-#include <iostream>
-#include <string>
-#include <stdint.h>
-
-// Ersatz version of boost::progress so core language model doesn't depend on
-// boost.  Also adds option to print nothing.
-
-namespace util {
-
-extern const char kProgressBanner[];
-
-class ErsatzProgress {
-  public:
-    // No output.
-    ErsatzProgress();
-
-    // Null means no output.  The null value is useful for passing along the ostream pointer from another caller.
-    explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
-
-    ~ErsatzProgress();
-
-    ErsatzProgress &operator++() {
-      if (++current_ >= next_) Milestone();
-      return *this;
-    }
-
-    ErsatzProgress &operator+=(uint64_t amount) {
-      if ((current_ += amount) >= next_) Milestone();
-      return *this;
-    }
-
-    void Set(uint64_t to) {
-      if ((current_ = to) >= next_) Milestone();
-    }
-
-    void Finished() {
-      Set(complete_);
-    }
-
-  private:
-    void Milestone();
-
-    uint64_t current_, next_, complete_;
-    unsigned char stones_written_;
-    std::ostream *out_;
-
-    // noncopyable
-    ErsatzProgress(const ErsatzProgress &other);
-    ErsatzProgress &operator=(const ErsatzProgress &other);
-};
-
-} // namespace util
-
-#endif // UTIL_ERSATZ_PROGRESS_H

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/exception.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/exception.cc b/ext/kenlm/util/exception.cc
deleted file mode 100644
index e644d2c..0000000
--- a/ext/kenlm/util/exception.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-#include "util/exception.hh"
-
-#ifdef __GXX_RTTI
-#include <typeinfo>
-#endif
-
-#include <cerrno>
-#include <cstring>
-
-#if defined(_WIN32) || defined(_WIN64)
-#include <windows.h>
-#include <io.h>
-#endif
-
-namespace util {
-
-Exception::Exception() throw() {}
-Exception::~Exception() throw() {}
-
-void Exception::SetLocation(const char *file, unsigned int line, const char *func, const char *child_name, const char *condition) {
-  /* The child class might have set some text, but we want this to come first.
-   * Another option would be passing this information to the constructor, but
-   * then child classes would have to accept constructor arguments and pass
-   * them down.
-   */
-  std::string old_text;
-  std::swap(old_text, what_);
-  StringStream stream(what_);
-  stream << file << ':' << line;
-  if (func) stream << " in " << func << " threw ";
-  if (child_name) {
-    stream << child_name;
-  } else {
-#ifdef __GXX_RTTI
-    stream << typeid(this).name();
-#else
-    stream << "an exception";
-#endif
-  }
-  if (condition) {
-    stream << " because `" << condition << '\'';
-  }
-  stream << ".\n";
-  stream << old_text;
-}
-
-namespace {
-
-#ifdef __GNUC__
-const char *HandleStrerror(int ret, const char *buf) __attribute__ ((unused));
-const char *HandleStrerror(const char *ret, const char * /*buf*/) __attribute__ ((unused));
-#endif
-// At least one of these functions will not be called.
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-function"
-#endif
-// The XOPEN version.
-const char *HandleStrerror(int ret, const char *buf) {
-  if (!ret) return buf;
-  return NULL;
-}
-
-// The GNU version.
-const char *HandleStrerror(const char *ret, const char * /*buf*/) {
-  return ret;
-}
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-} // namespace
-
-ErrnoException::ErrnoException() throw() : errno_(errno) {
-  char buf[200];
-  buf[0] = 0;
-#if defined(sun) || defined(_WIN32) || defined(_WIN64)
-  const char *add = strerror(errno);
-#else
-  const char *add = HandleStrerror(strerror_r(errno, buf, 200), buf);
-#endif
-
-  if (add) {
-    *this << add << ' ';
-  }
-}
-
-ErrnoException::~ErrnoException() throw() {}
-
-OverflowException::OverflowException() throw() {}
-OverflowException::~OverflowException() throw() {}
-
-#if defined(_WIN32) || defined(_WIN64)
-WindowsException::WindowsException() throw() {
-  unsigned int last_error = GetLastError();
-  char error_msg[256] = "";
-  if (!FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, last_error, LANG_NEUTRAL, error_msg, sizeof(error_msg), NULL)) {
-    *this << "Windows error " << GetLastError() << " while formatting Windows error " << last_error << ". ";
-  } else {
-    *this << "Windows error " << last_error << ": " << error_msg;
-  }
-}
-WindowsException::~WindowsException() throw() {}
-#endif
-
-} // namespace util

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/exception.hh
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/exception.hh b/ext/kenlm/util/exception.hh
deleted file mode 100644
index 57d803d..0000000
--- a/ext/kenlm/util/exception.hh
+++ /dev/null
@@ -1,159 +0,0 @@
-#ifndef UTIL_EXCEPTION_H
-#define UTIL_EXCEPTION_H
-
-#include "util/string_stream.hh"
-
-#include <exception>
-#include <limits>
-#include <string>
-#include <stdint.h>
-
-namespace util {
-
-template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
-
-class Exception : public std::exception {
-  public:
-    Exception() throw();
-    virtual ~Exception() throw();
-
-    const char *what() const throw() { return what_.c_str(); }
-
-    // For use by the UTIL_THROW macros.
-    void SetLocation(
-        const char *file,
-        unsigned int line,
-        const char *func,
-        const char *child_name,
-        const char *condition);
-
-  private:
-    template <class Except, class Data> friend typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
-
-    // This helps restrict operator<< defined below.
-    template <class T> struct ExceptionTag {
-      typedef T Identity;
-    };
-
-    std::string what_;
-};
-
-/* This implements the normal operator<< for Exception and all its children.
- * SFINAE means it only applies to Exception.  Think of this as an ersatz
- * boost::enable_if.
- */
-template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data) {
-  StringStream(e.what_) << data;
-  return e;
-}
-
-#ifdef __GNUC__
-#define UTIL_FUNC_NAME __PRETTY_FUNCTION__
-#else
-#ifdef _WIN32
-#define UTIL_FUNC_NAME __FUNCTION__
-#else
-#define UTIL_FUNC_NAME NULL
-#endif
-#endif
-
-/* Create an instance of Exception, add the message Modify, and throw it.
- * Modify is appended to the what() message and can contain << for ostream
- * operations.
- *
- * do .. while kludge to swallow trailing ; character
- * http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html .
- * Arg can be a constructor argument to the exception.
- */
-#define UTIL_THROW_BACKEND(Condition, Exception, Arg, Modify) do { \
-  Exception UTIL_e Arg; \
-  UTIL_e.SetLocation(__FILE__, __LINE__, UTIL_FUNC_NAME, #Exception, Condition); \
-  UTIL_e << Modify; \
-  throw UTIL_e; \
-} while (0)
-
-#define UTIL_THROW_ARG(Exception, Arg, Modify) \
-  UTIL_THROW_BACKEND(NULL, Exception, Arg, Modify)
-
-#define UTIL_THROW(Exception, Modify) \
-  UTIL_THROW_BACKEND(NULL, Exception, , Modify);
-
-#define UTIL_THROW2(Modify) \
-  UTIL_THROW_BACKEND(NULL, util::Exception, , Modify);
-
-#if __GNUC__ >= 3
-#define UTIL_UNLIKELY(x) __builtin_expect (!!(x), 0)
-#else
-#define UTIL_UNLIKELY(x) (x)
-#endif
-
-#if __GNUC__ >= 3
-#define UTIL_LIKELY(x) __builtin_expect (!!(x), 1)
-#else
-#define UTIL_LIKELY(x) (x)
-#endif
-
-#define UTIL_THROW_IF_ARG(Condition, Exception, Arg, Modify) do { \
-  if (UTIL_UNLIKELY(Condition)) { \
-    UTIL_THROW_BACKEND(#Condition, Exception, Arg, Modify); \
-  } \
-} while (0)
-
-#define UTIL_THROW_IF(Condition, Exception, Modify) \
-  UTIL_THROW_IF_ARG(Condition, Exception, , Modify)
-
-#define UTIL_THROW_IF2(Condition, Modify) \
-  UTIL_THROW_IF_ARG(Condition, util::Exception, , Modify)
-
-// Exception that records errno and adds it to the message.
-class ErrnoException : public Exception {
-  public:
-    ErrnoException() throw();
-
-    virtual ~ErrnoException() throw();
-
-    int Error() const throw() { return errno_; }
-
-  private:
-    int errno_;
-};
-
-// file wasn't there, or couldn't be open for some reason
-class FileOpenException : public Exception {
-  public:
-	FileOpenException() throw() {}
-    ~FileOpenException() throw() {}
-};
-
-// Utilities for overflow checking.
-class OverflowException : public Exception {
-  public:
-    OverflowException() throw();
-    ~OverflowException() throw();
-};
-
-template <unsigned len> inline std::size_t CheckOverflowInternal(uint64_t value) {
-  UTIL_THROW_IF(value > static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), OverflowException, "Integer overflow detected.  This model is too big for 32-bit code.");
-  return value;
-}
-
-template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) {
-  return value;
-}
-
-inline std::size_t CheckOverflow(uint64_t value) {
-  return CheckOverflowInternal<sizeof(std::size_t)>(value);
-}
-
-#if defined(_WIN32) || defined(_WIN64)
-/* Thrown for Windows specific operations. */
-class WindowsException : public Exception {
-  public:
-    WindowsException() throw();
-    ~WindowsException() throw();
-};
-#endif
-
-} // namespace util
-
-#endif // UTIL_EXCEPTION_H

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/fake_ostream.hh
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/fake_ostream.hh b/ext/kenlm/util/fake_ostream.hh
deleted file mode 100644
index 2f76053..0000000
--- a/ext/kenlm/util/fake_ostream.hh
+++ /dev/null
@@ -1,111 +0,0 @@
-#ifndef UTIL_FAKE_OSTREAM_H
-#define UTIL_FAKE_OSTREAM_H
-
-#include "util/float_to_string.hh"
-#include "util/integer_to_string.hh"
-#include "util/string_piece.hh"
-
-#include <cassert>
-#include <limits>
-
-#include <stdint.h>
-
-namespace util {
-
-/* Like std::ostream but without being incredibly slow.
- * Supports most of the built-in types except for long double.
- * 
- * The FakeOStream class is intended to be inherited from.  The inherting class
- * should provide:
- * public:
- *   Derived &flush();
- *   Derived &write(const void *data, std::size_t length);
- * 
- * private: or protected:
- *   friend class FakeOStream;
- *   char *Ensure(std::size_t amount);
- *   void AdvanceTo(char *to);
- *
- * The Ensure function makes enough space for an in-place write and returns
- * where to write.  The AdvanceTo function happens after the write, saying how
- * much was actually written.
- * 
- * Precondition:
- * amount <= kToStringMaxBytes for in-place writes.
- */
-template <class Derived> class FakeOStream {
-  public:
-    FakeOStream() {}
-
-    // This also covers std::string and char*
-    Derived &operator<<(StringPiece str) {
-      return C().write(str.data(), str.size());
-    }
-
-    // Handle integers by size and signedness.
-  private:
-    template <class Arg> struct EnableIfKludge {
-      typedef Derived type;
-    };
-    template <class From, unsigned Length = sizeof(From), bool Signed = std::numeric_limits<From>::is_signed, bool IsInteger = std::numeric_limits<From>::is_integer> struct Coerce {};
-
-    template <class From> struct Coerce<From, 2, false, true> { typedef uint16_t To; };
-    template <class From> struct Coerce<From, 4, false, true> { typedef uint32_t To; };
-    template <class From> struct Coerce<From, 8, false, true> { typedef uint64_t To; };
-
-    template <class From> struct Coerce<From, 2, true, true> { typedef int16_t To; };
-    template <class From> struct Coerce<From, 4, true, true> { typedef int32_t To; };
-    template <class From> struct Coerce<From, 8, true, true> { typedef int64_t To; };
-  public:
-    template <class From> typename EnableIfKludge<typename Coerce<From>::To>::type &operator<<(const From value) {
-      return CallToString(static_cast<typename Coerce<From>::To>(value));
-    }
-
-    // Character types that get copied as bytes instead of displayed as integers.
-    Derived &operator<<(char val) { return put(val); }
-    Derived &operator<<(signed char val) { return put(static_cast<char>(val)); }
-    Derived &operator<<(unsigned char val) { return put(static_cast<char>(val)); }
-
-    Derived &operator<<(bool val) { return put(val + '0'); }
-    // enums will fall back to int but are not caught by the template.
-    Derived &operator<<(int val) { return CallToString(static_cast<typename Coerce<int>::To>(val)); }
-
-    Derived &operator<<(float val) { return CallToString(val); }
-    Derived &operator<<(double val) { return CallToString(val); }
-
-    // This is here to catch all the other pointer types.
-    Derived &operator<<(const void *value) { return CallToString(value); }
-    // This is here because the above line also catches const char*.
-    Derived &operator<<(const char *value) { return *this << StringPiece(value); }
-    Derived &operator<<(char *value) { return *this << StringPiece(value); }
-
-    Derived &put(char val) {
-      char *c = C().Ensure(1);
-      *c = val;
-      C().AdvanceTo(++c);
-      return C();
-    }
-
-    char widen(char val) const { return val; }
-
-  private:
-    // References to derived class for convenience.
-    Derived &C() {
-      return *static_cast<Derived*>(this);
-    }
-
-    const Derived &C() const {
-      return *static_cast<const Derived*>(this);
-    }
-
-    // This is separate to prevent an infinite loop if the compiler considers
-    // types the same (i.e. gcc std::size_t and uint64_t or uint32_t).
-    template <class T> Derived &CallToString(const T value) {
-      C().AdvanceTo(ToString(value, C().Ensure(ToStringBuf<T>::kBytes)));
-      return C();
-    }
-};
-
-} // namespace
-
-#endif // UTIL_FAKE_OSTREAM_H

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/file.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/file.cc b/ext/kenlm/util/file.cc
deleted file mode 100644
index e8976bc..0000000
--- a/ext/kenlm/util/file.cc
+++ /dev/null
@@ -1,574 +0,0 @@
-#define _LARGEFILE64_SOURCE
-#define _FILE_OFFSET_BITS 64
-
-#include "util/file.hh"
-
-#include "util/exception.hh"
-
-#include <algorithm>
-#include <cstdlib>
-#include <cstdio>
-#include <iostream>
-#include <limits>
-#include <sstream>
-
-
-#include <cassert>
-#include <cerrno>
-#include <climits>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <stdint.h>
-
-#if defined(__MINGW32__)
-#include <windows.h>
-#include <unistd.h>
-#warning "The file functions on MinGW have not been tested for file sizes above 2^31 - 1.  Please read https://stackoverflow.com/questions/12539488/determine-64-bit-file-size-in-c-on-mingw-32-bit and fix"
-#elif defined(_WIN32) || defined(_WIN64)
-#include <windows.h>
-#include <io.h>
-#else
-#include <unistd.h>
-#endif
-
-namespace util {
-
-scoped_fd::~scoped_fd() {
-  if (fd_ != -1 && close(fd_)) {
-    std::cerr << "Could not close file " << fd_ << std::endl;
-    std::abort();
-  }
-}
-
-void scoped_FILE_closer::Close(std::FILE *file) {
-  if (file && std::fclose(file)) {
-    std::cerr << "Could not close file " << file << std::endl;
-    std::abort();
-  }
-}
-
-// Note that ErrnoException records errno before NameFromFD is called.
-FDException::FDException(int fd) throw() : fd_(fd), name_guess_(NameFromFD(fd)) {
-  *this << "in " << name_guess_ << ' ';
-}
-
-FDException::~FDException() throw() {}
-
-EndOfFileException::EndOfFileException() throw() {
-  *this << "End of file";
-}
-EndOfFileException::~EndOfFileException() throw() {}
-
-bool InputFileIsStdin(StringPiece path) {
-  return path == "-" || path == "/dev/stdin";
-}
-
-bool OutputFileIsStdout(StringPiece path) {
-  return path == "-" || path == "/dev/stdout";
-}
-
-int OpenReadOrThrow(const char *name) {
-  int ret;
-#if defined(_WIN32) || defined(_WIN64)
-  UTIL_THROW_IF(-1 == (ret = _open(name, _O_BINARY | _O_RDONLY)), ErrnoException, "while opening " << name);
-#else
-  UTIL_THROW_IF(-1 == (ret = open(name, O_RDONLY)), ErrnoException, "while opening " << name);
-#endif
-  return ret;
-}
-
-int CreateOrThrow(const char *name) {
-  int ret;
-#if defined(_WIN32) || defined(_WIN64)
-  UTIL_THROW_IF(-1 == (ret = _open(name, _O_CREAT | _O_TRUNC | _O_RDWR | _O_BINARY, _S_IREAD | _S_IWRITE)), ErrnoException, "while creating " << name);
-#else
-  UTIL_THROW_IF(-1 == (ret = open(name, O_CREAT | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)), ErrnoException, "while creating " << name);
-#endif
-  return ret;
-}
-
-uint64_t SizeFile(int fd) {
-#if defined __MINGW32__
-  struct stat sb;
-  // Does this handle 64-bit?
-  int ret = fstat(fd, &sb);
-  if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
-  return sb.st_size;
-#elif defined(_WIN32) || defined(_WIN64)
-  __int64 ret = _filelengthi64(fd);
-  return (ret == -1) ? kBadSize : ret;
-#else // Not windows.
-
-#ifdef OS_ANDROID
-  struct stat64 sb;
-  int ret = fstat64(fd, &sb);
-#else
-  struct stat sb;
-  int ret = fstat(fd, &sb);
-#endif
-  if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
-  return sb.st_size;
-#endif
-}
-
-uint64_t SizeOrThrow(int fd) {
-  uint64_t ret = SizeFile(fd);
-  UTIL_THROW_IF_ARG(ret == kBadSize, FDException, (fd), "Failed to size");
-  return ret;
-}
-
-void ResizeOrThrow(int fd, uint64_t to) {
-#if defined __MINGW32__
-    // Does this handle 64-bit?
-    int ret = ftruncate
-#elif defined(_WIN32) || defined(_WIN64)
-    errno_t ret = _chsize_s
-#elif defined(OS_ANDROID)
-    int ret = ftruncate64
-#else
-    int ret = ftruncate
-#endif
-    (fd, to);
-  UTIL_THROW_IF_ARG(ret, FDException, (fd), "while resizing to " << to << " bytes");
-}
-
-namespace {
-std::size_t GuardLarge(std::size_t size) {
-  // The following operating systems have broken read/write/pread/pwrite that
-  // only supports up to 2^31.
-  // OS X man pages claim to support 64-bit, but Kareem M. Darwish had problems
-  // building with larger files, so APPLE is also here.
-#if defined(_WIN32) || defined(_WIN64) || defined(__APPLE__) || defined(OS_ANDROID) || defined(__MINGW32__)
-  return size < INT_MAX ? size : INT_MAX;
-#else
-  return size;
-#endif
-}
-}
-
-#if defined(_WIN32) || defined(_WIN64)
-namespace {
-const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
-} // namespace
-#endif
-
-std::size_t PartialRead(int fd, void *to, std::size_t amount) {
-#if defined(_WIN32) || defined(_WIN64)
-    DWORD ret;
-    HANDLE file_handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
-    DWORD larger_size = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, amount));
-    DWORD smaller_size = 28672; // Received reports that 31346 worked but higher values did not. This rounds down to the nearest multiple of 4096, the page size. 
-    if (!ReadFile(file_handle, to, larger_size, &ret, NULL))
-    {
-        DWORD last_error = GetLastError();
-        if (last_error != ERROR_NOT_ENOUGH_MEMORY || !ReadFile(file_handle, to, smaller_size, &ret, NULL)) {
-            UTIL_THROW(WindowsException, "Windows error in ReadFile.");
-        }
-    }
-#else
-  errno = 0;
-  ssize_t ret;
-  do {
-    ret = read(fd, to, GuardLarge(amount));
-  } while (ret == -1 && errno == EINTR);
-  UTIL_THROW_IF_ARG(ret < 0, FDException, (fd), "while reading " << amount << " bytes");
-#endif
-  return static_cast<std::size_t>(ret);
-}
-
-void ReadOrThrow(int fd, void *to_void, std::size_t amount) {
-  uint8_t *to = static_cast<uint8_t*>(to_void);
-  while (amount) {
-    std::size_t ret = PartialRead(fd, to, amount);
-    UTIL_THROW_IF(ret == 0, EndOfFileException, " in " << NameFromFD(fd) << " but there should be " << amount << " more bytes to read.");
-    amount -= ret;
-    to += ret;
-  }
-}
-
-std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
-  uint8_t *to = static_cast<uint8_t*>(to_void);
-  std::size_t remaining = amount;
-  while (remaining) {
-    std::size_t ret = PartialRead(fd, to, remaining);
-    if (!ret) return amount - remaining;
-    remaining -= ret;
-    to += ret;
-  }
-  return amount;
-}
-
-void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
-  const uint8_t *data = static_cast<const uint8_t*>(data_void);
-  while (size) {
-#if defined(_WIN32) || defined(_WIN64)
-    int ret;
-#else
-    ssize_t ret;
-#endif
-    errno = 0;
-    do {
-      ret =
-#if defined(_WIN32) || defined(_WIN64)
-        _write
-#else
-        write
-#endif
-        (fd, data, GuardLarge(size));
-    } while (ret == -1 && errno == EINTR);
-    UTIL_THROW_IF_ARG(ret < 1, FDException, (fd), "while writing " << size << " bytes");
-    data += ret;
-    size -= ret;
-  }
-}
-
-void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
-  if (!size) return;
-  UTIL_THROW_IF(1 != std::fwrite(data, size, 1, to), ErrnoException, "Short write; requested size " << size);
-}
-
-void ErsatzPRead(int fd, void *to_void, std::size_t size, uint64_t off) {
-  uint8_t *to = static_cast<uint8_t*>(to_void);
-  while (size) {
-#if defined(_WIN32) || defined(_WIN64)
-    /* BROKEN: changes file pointer.  Even if you save it and change it back, it won't be safe to use concurrently with write() or read() which lmplz does. */
-    // size_t might be 64-bit.  DWORD is always 32.
-    DWORD reading = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, size));
-    DWORD ret;
-    OVERLAPPED overlapped;
-    memset(&overlapped, 0, sizeof(OVERLAPPED));
-    overlapped.Offset = static_cast<DWORD>(off);
-    overlapped.OffsetHigh = static_cast<DWORD>(off >> 32);
-    UTIL_THROW_IF(!ReadFile((HANDLE)_get_osfhandle(fd), to, reading, &ret, &overlapped), WindowsException, "ReadFile failed for offset " << off);
-#else
-    ssize_t ret;
-    errno = 0;
-    ret =
-#ifdef OS_ANDROID
-      pread64
-#else
-      pread
-#endif
-      (fd, to, GuardLarge(size), off);
-    if (ret <= 0) {
-      if (ret == -1 && errno == EINTR) continue;
-      UTIL_THROW_IF(ret == 0, EndOfFileException, " for reading " << size << " bytes at " << off << " from " << NameFromFD(fd));
-      UTIL_THROW_ARG(FDException, (fd), "while reading " << size << " bytes at offset " << off);
-    }
-#endif
-    size -= ret;
-    off += ret;
-    to += ret;
-  }
-}
-
-void ErsatzPWrite(int fd, const void *from_void, std::size_t size, uint64_t off) {
-  const uint8_t *from = static_cast<const uint8_t*>(from_void);
-  while(size) {
-#if defined(_WIN32) || defined(_WIN64)
-    /* Changes file pointer.  Even if you save it and change it back, it won't be safe to use concurrently with write() or read() */
-    // size_t might be 64-bit.  DWORD is always 32.
-    DWORD writing = static_cast<DWORD>(std::min<std::size_t>(kMaxDWORD, size));
-    DWORD ret;
-    OVERLAPPED overlapped;
-    memset(&overlapped, 0, sizeof(OVERLAPPED));
-    overlapped.Offset = static_cast<DWORD>(off);
-    overlapped.OffsetHigh = static_cast<DWORD>(off >> 32);
-    UTIL_THROW_IF(!WriteFile((HANDLE)_get_osfhandle(fd), from, writing, &ret, &overlapped), Exception, "WriteFile failed for offset " << off);
-#else
-    ssize_t ret;
-    errno = 0;
-    ret =
-#ifdef OS_ANDROID
-      pwrite64
-#else
-      pwrite
-#endif
-      (fd, from, GuardLarge(size), off);
-    if (ret <= 0) {
-      if (ret == -1 && errno == EINTR) continue;
-      UTIL_THROW_IF(ret == 0, EndOfFileException, " for writing " << size << " bytes at " << off << " from " << NameFromFD(fd));
-      UTIL_THROW_ARG(FDException, (fd), "while writing " << size << " bytes at offset " << off);
-    }
-#endif
-    size -= ret;
-    off += ret;
-    from += ret;
-  }
-}
-
-
-void FSyncOrThrow(int fd) {
-// Apparently windows doesn't have fsync?
-#if !defined(_WIN32) && !defined(_WIN64)
-  UTIL_THROW_IF_ARG(-1 == fsync(fd), FDException, (fd), "while syncing");
-#endif
-}
-
-namespace {
-
-// Static assert for 64-bit off_t size.
-#if !defined(_WIN32) && !defined(_WIN64) && !defined(OS_ANDROID)
-template <unsigned> struct CheckOffT;
-template <> struct CheckOffT<8> {
-  struct True {};
-};
-// If there's a compiler error on the next line, then off_t isn't 64 bit.  And
-// that makes me a sad panda.
-typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
-#endif
-
-// Can't we all just get along?
-void InternalSeek(int fd, int64_t off, int whence) {
-  if (
-#if defined __MINGW32__
-    // Does this handle 64-bit?
-    (off_t)-1 == lseek(fd, off, whence)
-#elif defined(_WIN32) || defined(_WIN64)
-    (__int64)-1 == _lseeki64(fd, off, whence)
-#elif defined(OS_ANDROID)
-    (off64_t)-1 == lseek64(fd, off, whence)
-#else
-    (off_t)-1 == lseek(fd, off, whence)
-#endif
-  ) UTIL_THROW_ARG(FDException, (fd), "while seeking to " << off << " whence " << whence);
-}
-} // namespace
-
-void SeekOrThrow(int fd, uint64_t off) {
-  InternalSeek(fd, off, SEEK_SET);
-}
-
-void AdvanceOrThrow(int fd, int64_t off) {
-  InternalSeek(fd, off, SEEK_CUR);
-}
-
-void SeekEnd(int fd) {
-  InternalSeek(fd, 0, SEEK_END);
-}
-
-std::FILE *FDOpenOrThrow(scoped_fd &file) {
-  std::FILE *ret = fdopen(file.get(), "r+b");
-  UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for write");
-  file.release();
-  return ret;
-}
-
-std::FILE *FDOpenReadOrThrow(scoped_fd &file) {
-  std::FILE *ret = fdopen(file.get(), "rb");
-  UTIL_THROW_IF_ARG(!ret, FDException, (file.get()), "Could not fdopen for read");
-  file.release();
-  return ret;
-}
-
-// Sigh.  Windows temporary file creation is full of race conditions.
-#if defined(_WIN32) || defined(_WIN64)
-/* mkstemp extracted from libc/sysdeps/posix/tempname.c.  Copyright
-   (C) 1991-1999, 2000, 2001, 2006 Free Software Foundation, Inc.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.  */
-
-/* This has been modified from the original version to rename the function and
- * set the Windows temporary flag. */
-
-static const char letters[] =
-"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
-
-/* Generate a temporary file name based on TMPL.  TMPL must match the
-   rules for mk[s]temp (i.e. end in "XXXXXX").  The name constructed
-   does not exist at the time of the call to mkstemp.  TMPL is
-   overwritten with the result.  */
-int
-mkstemp_and_unlink(char *tmpl)
-{
-  int len;
-  char *XXXXXX;
-  static unsigned long long value;
-  unsigned long long random_time_bits;
-  unsigned int count;
-  int fd = -1;
-  int save_errno = errno;
-
-  /* A lower bound on the number of temporary files to attempt to
-     generate.  The maximum total number of temporary file names that
-     can exist for a given template is 62**6.  It should never be
-     necessary to try all these combinations.  Instead if a reasonable
-     number of names is tried (we define reasonable as 62**3) fail to
-     give the system administrator the chance to remove the problems.  */
-#define ATTEMPTS_MIN (62 * 62 * 62)
-
-  /* The number of times to attempt to generate a temporary file.  To
-     conform to POSIX, this must be no smaller than TMP_MAX.  */
-#if ATTEMPTS_MIN < TMP_MAX
-  unsigned int attempts = TMP_MAX;
-#else
-  unsigned int attempts = ATTEMPTS_MIN;
-#endif
-
-  len = strlen (tmpl);
-  if (len < 6 || strcmp (&tmpl[len - 6], "XXXXXX"))
-    {
-      errno = EINVAL;
-      return -1;
-    }
-
-/* This is where the Xs start.  */
-  XXXXXX = &tmpl[len - 6];
-
-  /* Get some more or less random data.  */
-  {
-    SYSTEMTIME      stNow;
-    FILETIME ftNow;
-
-    // get system time
-    GetSystemTime(&stNow);
-    stNow.wMilliseconds = 500;
-    if (!SystemTimeToFileTime(&stNow, &ftNow))
-    {
-        errno = -1;
-        return -1;
-    }
-
-    random_time_bits = (((unsigned long long)ftNow.dwHighDateTime << 32)
-                        | (unsigned long long)ftNow.dwLowDateTime);
-  }
-  value += random_time_bits ^ (unsigned long long)GetCurrentThreadId ();
-
-  for (count = 0; count < attempts; value += 7777, ++count)
-  {
-    unsigned long long v = value;
-
-    /* Fill in the random bits.  */
-    XXXXXX[0] = letters[v % 62];
-    v /= 62;
-    XXXXXX[1] = letters[v % 62];
-    v /= 62;
-    XXXXXX[2] = letters[v % 62];
-    v /= 62;
-    XXXXXX[3] = letters[v % 62];
-    v /= 62;
-    XXXXXX[4] = letters[v % 62];
-    v /= 62;
-    XXXXXX[5] = letters[v % 62];
-
-    /* Modified for windows and to unlink */
-    //      fd = open (tmpl, O_RDWR | O_CREAT | O_EXCL, _S_IREAD | _S_IWRITE);
-    int flags = _O_RDWR | _O_CREAT | _O_EXCL | _O_BINARY;
-    flags |= _O_TEMPORARY;
-    fd = _open (tmpl, flags, _S_IREAD | _S_IWRITE);
-    if (fd >= 0)
-    {
-      errno = save_errno;
-      return fd;
-    }
-    else if (errno != EEXIST)
-      return -1;
-  }
-
-  /* We got out of the loop because we ran out of combinations to try.  */
-  errno = EEXIST;
-  return -1;
-}
-#else
-int
-mkstemp_and_unlink(char *tmpl) {
-  int ret = mkstemp(tmpl);
-  if (ret != -1) {
-    UTIL_THROW_IF(unlink(tmpl), ErrnoException, "while deleting delete " << tmpl);
-  }
-  return ret;
-}
-#endif
-
-// If it's a directory, add a /.  This lets users say -T /tmp without creating
-// /tmpAAAAAA
-void NormalizeTempPrefix(std::string &base) {
-  if (base.empty()) return;
-  if (base[base.size() - 1] == '/') return;
-  struct stat sb;
-  // It's fine for it to not exist.
-  if (-1 == stat(base.c_str(), &sb)) return;
-  if (
-#if defined(_WIN32) || defined(_WIN64)
-    sb.st_mode & _S_IFDIR
-#else
-    S_ISDIR(sb.st_mode)
-#endif
-    ) base += '/';
-}
-
-int MakeTemp(const StringPiece &base) {
-  std::string name(base.data(), base.size());
-  name += "XXXXXX";
-  name.push_back(0);
-  int ret;
-  UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&name[0])), ErrnoException, "while making a temporary based on " << base);
-  return ret;
-}
-
-std::FILE *FMakeTemp(const StringPiece &base) {
-  util::scoped_fd file(MakeTemp(base));
-  return FDOpenOrThrow(file);
-}
-
-int DupOrThrow(int fd) {
-  int ret = dup(fd);
-  UTIL_THROW_IF_ARG(ret == -1, FDException, (fd), "in duplicating the file descriptor");
-  return ret;
-}
-
-namespace {
-// Try to name things but be willing to fail too.
-bool TryName(int fd, std::string &out) {
-#if defined(_WIN32) || defined(_WIN64)
-  return false;
-#else
-  std::string name("/proc/self/fd/");
-  std::ostringstream convert;
-  convert << fd;
-  name += convert.str();
-
-  struct stat sb;
-  if (-1 == lstat(name.c_str(), &sb))
-    return false;
-  out.resize(sb.st_size + 1);
-  // lstat gave us a size, but I've seen it grow, possibly due to symlinks on top of symlinks.
-  while (true) {
-    ssize_t ret = readlink(name.c_str(), &out[0], out.size());
-    if (-1 == ret)
-      return false;
-    if ((size_t)ret < out.size()) {
-      out.resize(ret);
-      break;
-    }
-    // Exponential growth.
-    out.resize(out.size() * 2);
-  }
-  // Don't use the non-file names.
-  if (!out.empty() && out[0] != '/')
-    return false;
-  return true;
-#endif
-}
-} // namespace
-
-std::string NameFromFD(int fd) {
-  std::string ret;
-  if (TryName(fd, ret)) return ret;
-  switch (fd) {
-    case 0: return "stdin";
-    case 1: return "stdout";
-    case 2: return "stderr";
-  }
-  ret = "fd ";
-  std::ostringstream convert;
-  convert << fd;
-  ret += convert.str();
-  return ret;
-}
-
-} // namespace util

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/file.hh
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/file.hh b/ext/kenlm/util/file.hh
deleted file mode 100644
index f7cb4d6..0000000
--- a/ext/kenlm/util/file.hh
+++ /dev/null
@@ -1,154 +0,0 @@
-#ifndef UTIL_FILE_H
-#define UTIL_FILE_H
-
-#include "util/exception.hh"
-#include "util/scoped.hh"
-#include "util/string_piece.hh"
-
-#include <cstddef>
-#include <cstdio>
-#include <string>
-#include <stdint.h>
-
-namespace util {
-
-class scoped_fd {
-  public:
-    scoped_fd() : fd_(-1) {}
-
-    explicit scoped_fd(int fd) : fd_(fd) {}
-
-    ~scoped_fd();
-
-    void reset(int to = -1) {
-      scoped_fd other(fd_);
-      fd_ = to;
-    }
-
-    int get() const { return fd_; }
-
-    int operator*() const { return fd_; }
-
-    int release() {
-      int ret = fd_;
-      fd_ = -1;
-      return ret;
-    }
-
-  private:
-    int fd_;
-
-    scoped_fd(const scoped_fd &);
-    scoped_fd &operator=(const scoped_fd &);
-};
-
-struct scoped_FILE_closer {
-  static void Close(std::FILE *file);
-};
-typedef scoped<std::FILE, scoped_FILE_closer> scoped_FILE;
-
-/* Thrown for any operation where the fd is known. */
-class FDException : public ErrnoException {
-  public:
-    explicit FDException(int fd) throw();
-
-    virtual ~FDException() throw();
-
-    // This may no longer be valid if the exception was thrown past open.
-    int FD() const { return fd_; }
-
-    // Guess from NameFromFD.
-    const std::string &NameGuess() const { return name_guess_; }
-
-  private:
-    int fd_;
-
-    std::string name_guess_;
-};
-
-// End of file reached.
-class EndOfFileException : public Exception {
-  public:
-    EndOfFileException() throw();
-    ~EndOfFileException() throw();
-};
-
-// Open for read only.
-int OpenReadOrThrow(const char *name);
-// Create file if it doesn't exist, truncate if it does.  Opened for write.
-int CreateOrThrow(const char *name);
-
-/** Does the given input file path denote standard input?
- *
- * Returns true if, and only if, path is either "-" or "/dev/stdin".
- *
- * Opening standard input as a file may need some special treatment for
- * portability.  There's a convention that a dash ("-") in place of an input
- * file path denotes standard input, but opening "/dev/stdin" may need to be
- * special as well.
- */
-bool InputPathIsStdin(StringPiece path);
-
-/** Does the given output file path denote standard output?
- *
- * Returns true if, and only if, path is either "-" or "/dev/stdout".
- *
- * Opening standard output as a file may need some special treatment for
- * portability.  There's a convention that a dash ("-") in place of an output
- * file path denotes standard output, but opening "/dev/stdout" may need to be
- * special as well.
- */
-bool OutputPathIsStdout(StringPiece path);
-
-// Return value for SizeFile when it can't size properly.
-const uint64_t kBadSize = (uint64_t)-1;
-uint64_t SizeFile(int fd);
-uint64_t SizeOrThrow(int fd);
-
-void ResizeOrThrow(int fd, uint64_t to);
-
-std::size_t PartialRead(int fd, void *to, std::size_t size);
-void ReadOrThrow(int fd, void *to, std::size_t size);
-std::size_t ReadOrEOF(int fd, void *to_void, std::size_t size);
-
-void WriteOrThrow(int fd, const void *data_void, std::size_t size);
-void WriteOrThrow(FILE *to, const void *data, std::size_t size);
-
-/* These call pread/pwrite in a loop.  However, on Windows they call ReadFile/
- * WriteFile which changes the file pointer.  So it's safe to call ErsatzPRead
- * and ErsatzPWrite concurrently (or any combination thereof).  But it changes
- * the file pointer on windows, so it's not safe to call concurrently with
- * anything that uses the implicit file pointer e.g. the Read/Write functions
- * above.
- */
-void ErsatzPRead(int fd, void *to, std::size_t size, uint64_t off);
-void ErsatzPWrite(int fd, const void *data_void, std::size_t size, uint64_t off);
-
-void FSyncOrThrow(int fd);
-
-// Seeking
-void SeekOrThrow(int fd, uint64_t off);
-void AdvanceOrThrow(int fd, int64_t off);
-void SeekEnd(int fd);
-
-std::FILE *FDOpenOrThrow(scoped_fd &file);
-std::FILE *FDOpenReadOrThrow(scoped_fd &file);
-
-// Temporary files
-// Append a / if base is a directory.
-void NormalizeTempPrefix(std::string &base);
-int MakeTemp(const StringPiece &prefix);
-std::FILE *FMakeTemp(const StringPiece &prefix);
-
-// dup an fd.
-int DupOrThrow(int fd);
-
-/* Attempt get file name from fd.  This won't always work (i.e. on Windows or
- * a pipe).  The file might have been renamed.  It's intended for diagnostics
- * and logging only.
- */
-std::string NameFromFD(int fd);
-
-} // namespace util
-
-#endif // UTIL_FILE_H

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/file_piece.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/file_piece.cc b/ext/kenlm/util/file_piece.cc
deleted file mode 100644
index 0a4d3a9..0000000
--- a/ext/kenlm/util/file_piece.cc
+++ /dev/null
@@ -1,337 +0,0 @@
-#include "util/file_piece.hh"
-
-#include "util/double-conversion/double-conversion.h"
-#include "util/exception.hh"
-#include "util/file.hh"
-#include "util/mmap.hh"
-
-#if defined(_WIN32) || defined(_WIN64)
-#include <io.h>
-#else
-#include <unistd.h>
-#endif
-
-#include <cassert>
-#include <cerrno>
-#include <cmath>
-#include <cstdlib>
-#include <iostream>
-#include <limits>
-#include <string>
-
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-namespace util {
-
-ParseNumberException::ParseNumberException(StringPiece value) throw() {
-  *this << "Could not parse \"" << value << "\" into a ";
-}
-
-// Sigh this is the only way I could come up with to do a _const_ bool.  It has ' ', '\f', '\n', '\r', '\t', and '\v' (same as isspace on C locale).
-const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-
-FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t min_buffer) :
-  file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())), page_(SizePage()),
-  progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) {
-  Initialize(name, show_progress, min_buffer);
-}
-
-namespace {
-std::string NamePossiblyFind(int fd, const char *name) {
-  if (name) return name;
-  return NameFromFD(fd);
-}
-} // namespace
-
-FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) :
-  file_(fd), total_size_(SizeFile(file_.get())), page_(SizePage()),
-  progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + NamePossiblyFind(fd, name)) {
-  Initialize(NamePossiblyFind(fd, name).c_str(), show_progress, min_buffer);
-}
-
-FilePiece::FilePiece(std::istream &stream, const char *name, std::size_t min_buffer) :
-  total_size_(kBadSize), page_(SizePage()) {
-  InitializeNoRead("istream", min_buffer);
-
-  fallback_to_read_ = true;
-  HugeMalloc(default_map_size_, false, data_);
-  position_ = data_.begin();
-  position_end_ = position_;
-
-  fell_back_.Reset(stream);
-}
-
-FilePiece::~FilePiece() {}
-
-StringPiece FilePiece::ReadLine(char delim, bool strip_cr) {
-  std::size_t skip = 0;
-  while (true) {
-    for (const char *i = position_ + skip; i < position_end_; ++i) {
-      if (*i == delim) {
-        // End of line.
-        // Take 1 byte off the end if it's an unwanted carriage return.
-        const std::size_t subtract_cr = (
-            (strip_cr && i > position_ && *(i - 1) == '\r') ?
-            1 : 0);
-        StringPiece ret(position_, i - position_ - subtract_cr);
-        position_ = i + 1;
-        return ret;
-      }
-    }
-    if (at_end_) {
-      if (position_ == position_end_) {
-        Shift();
-      }
-      return Consume(position_end_);
-    }
-    skip = position_end_ - position_;
-    Shift();
-  }
-}
-
-bool FilePiece::ReadLineOrEOF(StringPiece &to, char delim, bool strip_cr) {
-  try {
-    to = ReadLine(delim, strip_cr);
-  } catch (const util::EndOfFileException &e) { return false; }
-  return true;
-}
-
-float FilePiece::ReadFloat() {
-  return ReadNumber<float>();
-}
-double FilePiece::ReadDouble() {
-  return ReadNumber<double>();
-}
-long int FilePiece::ReadLong() {
-  return ReadNumber<long int>();
-}
-unsigned long int FilePiece::ReadULong() {
-  return ReadNumber<unsigned long int>();
-}
-
-// Factored out so that istream can call this.
-void FilePiece::InitializeNoRead(const char *name, std::size_t min_buffer) {
-  file_name_ = name;
-
-  default_map_size_ = page_ * std::max<std::size_t>((min_buffer / page_ + 1), 2);
-  position_ = NULL;
-  position_end_ = NULL;
-  mapped_offset_ = 0;
-  at_end_ = false;
-}
-
-void FilePiece::Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer) {
-  InitializeNoRead(name, min_buffer);
-
-  if (total_size_ == kBadSize) {
-    // So the assertion passes.
-    fallback_to_read_ = false;
-    if (show_progress)
-      *show_progress << "File " << name << " isn't normal.  Using slower read() instead of mmap().  No progress bar." << std::endl;
-    TransitionToRead();
-  } else {
-    fallback_to_read_ = false;
-  }
-  Shift();
-  // gzip detect.
-  if ((position_end_ >= position_ + ReadCompressed::kMagicSize) && ReadCompressed::DetectCompressedMagic(position_)) {
-    if (!fallback_to_read_) {
-      at_end_ = false;
-      TransitionToRead();
-    }
-  }
-}
-
-namespace {
-
-static const double_conversion::StringToDoubleConverter kConverter(
-    double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK | double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES,
-    std::numeric_limits<double>::quiet_NaN(),
-    std::numeric_limits<double>::quiet_NaN(),
-    "inf",
-    "NaN");
-
-StringPiece FirstToken(StringPiece str) {
-  const char *i;
-  for (i = str.data(); i != str.data() + str.size(); ++i) {
-    if (kSpaces[(unsigned char)*i]) break;
-  }
-  return StringPiece(str.data(), i - str.data());
-}
-
-const char *ParseNumber(StringPiece str, float &out) {
-  int count;
-  out = kConverter.StringToFloat(str.data(), str.size(), &count);
-  UTIL_THROW_IF_ARG(std::isnan(out) && str != "NaN" && str != "nan", ParseNumberException, (FirstToken(str)), "float");
-  return str.data() + count;
-}
-const char *ParseNumber(StringPiece str, double &out) {
-  int count;
-  out = kConverter.StringToDouble(str.data(), str.size(), &count);
-  UTIL_THROW_IF_ARG(std::isnan(out) && str != "NaN" && str != "nan", ParseNumberException, (FirstToken(str)), "double");
-  return str.data() + count;
-}
-const char *ParseNumber(StringPiece str, long int &out) {
-  char *end;
-  errno = 0;
-  out = strtol(str.data(), &end, 10);
-  UTIL_THROW_IF_ARG(errno || (end == str.data()), ParseNumberException, (FirstToken(str)), "long int");
-  return end;
-}
-const char *ParseNumber(StringPiece str, unsigned long int &out) {
-  char *end;
-  errno = 0;
-  out = strtoul(str.data(), &end, 10);
-  UTIL_THROW_IF_ARG(errno || (end == str.data()), ParseNumberException, (FirstToken(str)), "unsigned long int");
-  return end;
-}
-} // namespace
-
-template <class T> T FilePiece::ReadNumber() {
-  SkipSpaces();
-  while (last_space_ < position_) {
-    if (UTIL_UNLIKELY(at_end_)) {
-      // Hallucinate a null off the end of the file.
-      std::string buffer(position_, position_end_);
-      T ret;
-      // Has to be null-terminated.
-      const char *begin = buffer.c_str();
-      const char *end = ParseNumber(StringPiece(begin, buffer.size()), ret);
-      position_ += end - begin;
-      return ret;
-    }
-    Shift();
-  }
-  T ret;
-  position_ = ParseNumber(StringPiece(position_, last_space_ - position_), ret);
-  return ret;
-}
-
-const char *FilePiece::FindDelimiterOrEOF(const bool *delim)  {
-  std::size_t skip = 0;
-  while (true) {
-    for (const char *i = position_ + skip; i < position_end_; ++i) {
-      if (delim[static_cast<unsigned char>(*i)]) return i;
-    }
-    if (at_end_) {
-      if (position_ == position_end_) Shift();
-      return position_end_;
-    }
-    skip = position_end_ - position_;
-    Shift();
-  }
-}
-
-void FilePiece::Shift() {
-  if (at_end_) {
-    progress_.Finished();
-    throw EndOfFileException();
-  }
-  uint64_t desired_begin = position_ - data_.begin() + mapped_offset_;
-
-  if (!fallback_to_read_) MMapShift(desired_begin);
-  // Notice an mmap failure might set the fallback.
-  if (fallback_to_read_) ReadShift();
-
-  for (last_space_ = position_end_ - 1; last_space_ >= position_; --last_space_) {
-    if (kSpaces[static_cast<unsigned char>(*last_space_)])  break;
-  }
-}
-
-void FilePiece::MMapShift(uint64_t desired_begin) {
-  // Use mmap.
-  uint64_t ignore = desired_begin % page_;
-  // Duplicate request for Shift means give more data.
-  if (position_ == data_.begin() + ignore && position_) {
-    default_map_size_ *= 2;
-  }
-  // Local version so that in case of failure it doesn't overwrite the class variable.
-  uint64_t mapped_offset = desired_begin - ignore;
-
-  uint64_t mapped_size;
-  if (default_map_size_ >= static_cast<std::size_t>(total_size_ - mapped_offset)) {
-    at_end_ = true;
-    mapped_size = total_size_ - mapped_offset;
-  } else {
-    mapped_size = default_map_size_;
-  }
-
-  // Forcibly clear the existing mmap first.
-  data_.reset();
-  try {
-    MapRead(POPULATE_OR_LAZY, *file_, mapped_offset, mapped_size, data_);
-  } catch (const util::ErrnoException &e) {
-    if (desired_begin) {
-      SeekOrThrow(*file_, desired_begin);
-    }
-    // The mmap was scheduled to end the file, but now we're going to read it.
-    at_end_ = false;
-    TransitionToRead();
-    return;
-  }
-  mapped_offset_ = mapped_offset;
-  position_ = data_.begin() + ignore;
-  position_end_ = data_.begin() + mapped_size;
-
-  progress_.Set(desired_begin);
-}
-
-void FilePiece::TransitionToRead() {
-  assert(!fallback_to_read_);
-  fallback_to_read_ = true;
-  data_.reset();
-  HugeMalloc(default_map_size_, false, data_);
-  position_ = data_.begin();
-  position_end_ = position_;
-
-  try {
-    fell_back_.Reset(file_.release());
-  } catch (util::Exception &e) {
-    e << " in file " << file_name_;
-    throw;
-  }
-}
-
-void FilePiece::ReadShift() {
-  assert(fallback_to_read_);
-  // Bytes [data_.begin(), position_) have been consumed.
-  // Bytes [position_, position_end_) have been read into the buffer.
-
-  // Start at the beginning of the buffer if there's nothing useful in it.
-  if (position_ == position_end_) {
-    mapped_offset_ += (position_end_ - data_.begin());
-    position_ = data_.begin();
-    position_end_ = position_;
-  }
-
-  std::size_t already_read = position_end_ - data_.begin();
-
-  if (already_read == default_map_size_) {
-    if (position_ == data_.begin()) {
-      // Buffer too small.
-      std::size_t valid_length = position_end_ - position_;
-      default_map_size_ *= 2;
-      HugeRealloc(default_map_size_, false, data_);
-      position_ = data_.begin();
-      position_end_ = position_ + valid_length;
-    } else {
-      std::size_t moving = position_end_ - position_;
-      memmove(data_.get(), position_, moving);
-      position_ = data_.begin();
-      position_end_ = position_ + moving;
-      already_read = moving;
-    }
-  }
-
-  std::size_t read_return = fell_back_.Read(static_cast<uint8_t*>(data_.get()) + already_read, default_map_size_ - already_read);
-  progress_.Set(fell_back_.RawAmount());
-
-  if (read_return == 0) {
-    at_end_ = true;
-  }
-  position_end_ += read_return;
-}
-
-} // namespace util