You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/19 21:33:52 UTC
[04/51] [partial] incubator-joshua git commit: Converted KenLM into a
submodule
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/double-conversion/bignum.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/double-conversion/bignum.h b/ext/kenlm/util/double-conversion/bignum.h
deleted file mode 100644
index 5ec3544..0000000
--- a/ext/kenlm/util/double-conversion/bignum.h
+++ /dev/null
@@ -1,145 +0,0 @@
-// Copyright 2010 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef DOUBLE_CONVERSION_BIGNUM_H_
-#define DOUBLE_CONVERSION_BIGNUM_H_
-
-#include "utils.h"
-
-namespace double_conversion {
-
-class Bignum {
- public:
- // 3584 = 128 * 28. We can represent 2^3584 > 10^1000 accurately.
- // This bignum can encode much bigger numbers, since it contains an
- // exponent.
- static const int kMaxSignificantBits = 3584;
-
- Bignum();
- void AssignUInt16(uint16_t value);
- void AssignUInt64(uint64_t value);
- void AssignBignum(const Bignum& other);
-
- void AssignDecimalString(Vector<const char> value);
- void AssignHexString(Vector<const char> value);
-
- void AssignPowerUInt16(uint16_t base, int exponent);
-
- void AddUInt16(uint16_t operand);
- void AddUInt64(uint64_t operand);
- void AddBignum(const Bignum& other);
- // Precondition: this >= other.
- void SubtractBignum(const Bignum& other);
-
- void Square();
- void ShiftLeft(int shift_amount);
- void MultiplyByUInt32(uint32_t factor);
- void MultiplyByUInt64(uint64_t factor);
- void MultiplyByPowerOfTen(int exponent);
- void Times10() { return MultiplyByUInt32(10); }
- // Pseudocode:
- // int result = this / other;
- // this = this % other;
- // In the worst case this function is in O(this/other).
- uint16_t DivideModuloIntBignum(const Bignum& other);
-
- bool ToHexString(char* buffer, int buffer_size) const;
-
- // Returns
- // -1 if a < b,
- // 0 if a == b, and
- // +1 if a > b.
- static int Compare(const Bignum& a, const Bignum& b);
- static bool Equal(const Bignum& a, const Bignum& b) {
- return Compare(a, b) == 0;
- }
- static bool LessEqual(const Bignum& a, const Bignum& b) {
- return Compare(a, b) <= 0;
- }
- static bool Less(const Bignum& a, const Bignum& b) {
- return Compare(a, b) < 0;
- }
- // Returns Compare(a + b, c);
- static int PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c);
- // Returns a + b == c
- static bool PlusEqual(const Bignum& a, const Bignum& b, const Bignum& c) {
- return PlusCompare(a, b, c) == 0;
- }
- // Returns a + b <= c
- static bool PlusLessEqual(const Bignum& a, const Bignum& b, const Bignum& c) {
- return PlusCompare(a, b, c) <= 0;
- }
- // Returns a + b < c
- static bool PlusLess(const Bignum& a, const Bignum& b, const Bignum& c) {
- return PlusCompare(a, b, c) < 0;
- }
- private:
- typedef uint32_t Chunk;
- typedef uint64_t DoubleChunk;
-
- static const int kChunkSize = sizeof(Chunk) * 8;
- static const int kDoubleChunkSize = sizeof(DoubleChunk) * 8;
- // With bigit size of 28 we loose some bits, but a double still fits easily
- // into two chunks, and more importantly we can use the Comba multiplication.
- static const int kBigitSize = 28;
- static const Chunk kBigitMask = (1 << kBigitSize) - 1;
- // Every instance allocates kBigitLength chunks on the stack. Bignums cannot
- // grow. There are no checks if the stack-allocated space is sufficient.
- static const int kBigitCapacity = kMaxSignificantBits / kBigitSize;
-
- void EnsureCapacity(int size) {
- if (size > kBigitCapacity) {
- UNREACHABLE();
- }
- }
- void Align(const Bignum& other);
- void Clamp();
- bool IsClamped() const;
- void Zero();
- // Requires this to have enough capacity (no tests done).
- // Updates used_digits_ if necessary.
- // shift_amount must be < kBigitSize.
- void BigitsShiftLeft(int shift_amount);
- // BigitLength includes the "hidden" digits encoded in the exponent.
- int BigitLength() const { return used_digits_ + exponent_; }
- Chunk BigitAt(int index) const;
- void SubtractTimes(const Bignum& other, int factor);
-
- Chunk bigits_buffer_[kBigitCapacity];
- // A vector backed by bigits_buffer_. This way accesses to the array are
- // checked for out-of-bounds errors.
- Vector<Chunk> bigits_;
- int used_digits_;
- // The Bignum's value equals value(bigits_) * 2^(exponent_ * kBigitSize).
- int exponent_;
-
- DISALLOW_COPY_AND_ASSIGN(Bignum);
-};
-
-} // namespace double_conversion
-
-#endif // DOUBLE_CONVERSION_BIGNUM_H_
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/double-conversion/cached-powers.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/double-conversion/cached-powers.cc b/ext/kenlm/util/double-conversion/cached-powers.cc
deleted file mode 100644
index 9dcfa36..0000000
--- a/ext/kenlm/util/double-conversion/cached-powers.cc
+++ /dev/null
@@ -1,175 +0,0 @@
-// Copyright 2006-2008 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <cstdarg>
-#include <climits>
-#include <cmath>
-
-#include "utils.h"
-
-#include "cached-powers.h"
-
-namespace double_conversion {
-
-struct CachedPower {
- uint64_t significand;
- int16_t binary_exponent;
- int16_t decimal_exponent;
-};
-
-static const CachedPower kCachedPowers[] = {
- {UINT64_2PART_C(0xfa8fd5a0, 081c0288), -1220, -348},
- {UINT64_2PART_C(0xbaaee17f, a23ebf76), -1193, -340},
- {UINT64_2PART_C(0x8b16fb20, 3055ac76), -1166, -332},
- {UINT64_2PART_C(0xcf42894a, 5dce35ea), -1140, -324},
- {UINT64_2PART_C(0x9a6bb0aa, 55653b2d), -1113, -316},
- {UINT64_2PART_C(0xe61acf03, 3d1a45df), -1087, -308},
- {UINT64_2PART_C(0xab70fe17, c79ac6ca), -1060, -300},
- {UINT64_2PART_C(0xff77b1fc, bebcdc4f), -1034, -292},
- {UINT64_2PART_C(0xbe5691ef, 416bd60c), -1007, -284},
- {UINT64_2PART_C(0x8dd01fad, 907ffc3c), -980, -276},
- {UINT64_2PART_C(0xd3515c28, 31559a83), -954, -268},
- {UINT64_2PART_C(0x9d71ac8f, ada6c9b5), -927, -260},
- {UINT64_2PART_C(0xea9c2277, 23ee8bcb), -901, -252},
- {UINT64_2PART_C(0xaecc4991, 4078536d), -874, -244},
- {UINT64_2PART_C(0x823c1279, 5db6ce57), -847, -236},
- {UINT64_2PART_C(0xc2109436, 4dfb5637), -821, -228},
- {UINT64_2PART_C(0x9096ea6f, 3848984f), -794, -220},
- {UINT64_2PART_C(0xd77485cb, 25823ac7), -768, -212},
- {UINT64_2PART_C(0xa086cfcd, 97bf97f4), -741, -204},
- {UINT64_2PART_C(0xef340a98, 172aace5), -715, -196},
- {UINT64_2PART_C(0xb23867fb, 2a35b28e), -688, -188},
- {UINT64_2PART_C(0x84c8d4df, d2c63f3b), -661, -180},
- {UINT64_2PART_C(0xc5dd4427, 1ad3cdba), -635, -172},
- {UINT64_2PART_C(0x936b9fce, bb25c996), -608, -164},
- {UINT64_2PART_C(0xdbac6c24, 7d62a584), -582, -156},
- {UINT64_2PART_C(0xa3ab6658, 0d5fdaf6), -555, -148},
- {UINT64_2PART_C(0xf3e2f893, dec3f126), -529, -140},
- {UINT64_2PART_C(0xb5b5ada8, aaff80b8), -502, -132},
- {UINT64_2PART_C(0x87625f05, 6c7c4a8b), -475, -124},
- {UINT64_2PART_C(0xc9bcff60, 34c13053), -449, -116},
- {UINT64_2PART_C(0x964e858c, 91ba2655), -422, -108},
- {UINT64_2PART_C(0xdff97724, 70297ebd), -396, -100},
- {UINT64_2PART_C(0xa6dfbd9f, b8e5b88f), -369, -92},
- {UINT64_2PART_C(0xf8a95fcf, 88747d94), -343, -84},
- {UINT64_2PART_C(0xb9447093, 8fa89bcf), -316, -76},
- {UINT64_2PART_C(0x8a08f0f8, bf0f156b), -289, -68},
- {UINT64_2PART_C(0xcdb02555, 653131b6), -263, -60},
- {UINT64_2PART_C(0x993fe2c6, d07b7fac), -236, -52},
- {UINT64_2PART_C(0xe45c10c4, 2a2b3b06), -210, -44},
- {UINT64_2PART_C(0xaa242499, 697392d3), -183, -36},
- {UINT64_2PART_C(0xfd87b5f2, 8300ca0e), -157, -28},
- {UINT64_2PART_C(0xbce50864, 92111aeb), -130, -20},
- {UINT64_2PART_C(0x8cbccc09, 6f5088cc), -103, -12},
- {UINT64_2PART_C(0xd1b71758, e219652c), -77, -4},
- {UINT64_2PART_C(0x9c400000, 00000000), -50, 4},
- {UINT64_2PART_C(0xe8d4a510, 00000000), -24, 12},
- {UINT64_2PART_C(0xad78ebc5, ac620000), 3, 20},
- {UINT64_2PART_C(0x813f3978, f8940984), 30, 28},
- {UINT64_2PART_C(0xc097ce7b, c90715b3), 56, 36},
- {UINT64_2PART_C(0x8f7e32ce, 7bea5c70), 83, 44},
- {UINT64_2PART_C(0xd5d238a4, abe98068), 109, 52},
- {UINT64_2PART_C(0x9f4f2726, 179a2245), 136, 60},
- {UINT64_2PART_C(0xed63a231, d4c4fb27), 162, 68},
- {UINT64_2PART_C(0xb0de6538, 8cc8ada8), 189, 76},
- {UINT64_2PART_C(0x83c7088e, 1aab65db), 216, 84},
- {UINT64_2PART_C(0xc45d1df9, 42711d9a), 242, 92},
- {UINT64_2PART_C(0x924d692c, a61be758), 269, 100},
- {UINT64_2PART_C(0xda01ee64, 1a708dea), 295, 108},
- {UINT64_2PART_C(0xa26da399, 9aef774a), 322, 116},
- {UINT64_2PART_C(0xf209787b, b47d6b85), 348, 124},
- {UINT64_2PART_C(0xb454e4a1, 79dd1877), 375, 132},
- {UINT64_2PART_C(0x865b8692, 5b9bc5c2), 402, 140},
- {UINT64_2PART_C(0xc83553c5, c8965d3d), 428, 148},
- {UINT64_2PART_C(0x952ab45c, fa97a0b3), 455, 156},
- {UINT64_2PART_C(0xde469fbd, 99a05fe3), 481, 164},
- {UINT64_2PART_C(0xa59bc234, db398c25), 508, 172},
- {UINT64_2PART_C(0xf6c69a72, a3989f5c), 534, 180},
- {UINT64_2PART_C(0xb7dcbf53, 54e9bece), 561, 188},
- {UINT64_2PART_C(0x88fcf317, f22241e2), 588, 196},
- {UINT64_2PART_C(0xcc20ce9b, d35c78a5), 614, 204},
- {UINT64_2PART_C(0x98165af3, 7b2153df), 641, 212},
- {UINT64_2PART_C(0xe2a0b5dc, 971f303a), 667, 220},
- {UINT64_2PART_C(0xa8d9d153, 5ce3b396), 694, 228},
- {UINT64_2PART_C(0xfb9b7cd9, a4a7443c), 720, 236},
- {UINT64_2PART_C(0xbb764c4c, a7a44410), 747, 244},
- {UINT64_2PART_C(0x8bab8eef, b6409c1a), 774, 252},
- {UINT64_2PART_C(0xd01fef10, a657842c), 800, 260},
- {UINT64_2PART_C(0x9b10a4e5, e9913129), 827, 268},
- {UINT64_2PART_C(0xe7109bfb, a19c0c9d), 853, 276},
- {UINT64_2PART_C(0xac2820d9, 623bf429), 880, 284},
- {UINT64_2PART_C(0x80444b5e, 7aa7cf85), 907, 292},
- {UINT64_2PART_C(0xbf21e440, 03acdd2d), 933, 300},
- {UINT64_2PART_C(0x8e679c2f, 5e44ff8f), 960, 308},
- {UINT64_2PART_C(0xd433179d, 9c8cb841), 986, 316},
- {UINT64_2PART_C(0x9e19db92, b4e31ba9), 1013, 324},
- {UINT64_2PART_C(0xeb96bf6e, badf77d9), 1039, 332},
- {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340},
-};
-
-static const int kCachedPowersLength = ARRAY_SIZE(kCachedPowers);
-static const int kCachedPowersOffset = 348; // -1 * the first decimal_exponent.
-static const double kD_1_LOG2_10 = 0.30102999566398114; // 1 / lg(10)
-// Difference between the decimal exponents in the table above.
-const int PowersOfTenCache::kDecimalExponentDistance = 8;
-const int PowersOfTenCache::kMinDecimalExponent = -348;
-const int PowersOfTenCache::kMaxDecimalExponent = 340;
-
-void PowersOfTenCache::GetCachedPowerForBinaryExponentRange(
- int min_exponent,
- int max_exponent,
- DiyFp* power,
- int* decimal_exponent) {
- int kQ = DiyFp::kSignificandSize;
- double k = ceil((min_exponent + kQ - 1) * kD_1_LOG2_10);
- int foo = kCachedPowersOffset;
- int index =
- (foo + static_cast<int>(k) - 1) / kDecimalExponentDistance + 1;
- ASSERT(0 <= index && index < kCachedPowersLength);
- CachedPower cached_power = kCachedPowers[index];
- ASSERT(min_exponent <= cached_power.binary_exponent);
- ASSERT(cached_power.binary_exponent <= max_exponent);
- *decimal_exponent = cached_power.decimal_exponent;
- *power = DiyFp(cached_power.significand, cached_power.binary_exponent);
-}
-
-
-void PowersOfTenCache::GetCachedPowerForDecimalExponent(int requested_exponent,
- DiyFp* power,
- int* found_exponent) {
- ASSERT(kMinDecimalExponent <= requested_exponent);
- ASSERT(requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance);
- int index =
- (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance;
- CachedPower cached_power = kCachedPowers[index];
- *power = DiyFp(cached_power.significand, cached_power.binary_exponent);
- *found_exponent = cached_power.decimal_exponent;
- ASSERT(*found_exponent <= requested_exponent);
- ASSERT(requested_exponent < *found_exponent + kDecimalExponentDistance);
-}
-
-} // namespace double_conversion
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/double-conversion/cached-powers.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/double-conversion/cached-powers.h b/ext/kenlm/util/double-conversion/cached-powers.h
deleted file mode 100644
index 61a5061..0000000
--- a/ext/kenlm/util/double-conversion/cached-powers.h
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright 2010 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef DOUBLE_CONVERSION_CACHED_POWERS_H_
-#define DOUBLE_CONVERSION_CACHED_POWERS_H_
-
-#include "diy-fp.h"
-
-namespace double_conversion {
-
-class PowersOfTenCache {
- public:
-
- // Not all powers of ten are cached. The decimal exponent of two neighboring
- // cached numbers will differ by kDecimalExponentDistance.
- static const int kDecimalExponentDistance;
-
- static const int kMinDecimalExponent;
- static const int kMaxDecimalExponent;
-
- // Returns a cached power-of-ten with a binary exponent in the range
- // [min_exponent; max_exponent] (boundaries included).
- static void GetCachedPowerForBinaryExponentRange(int min_exponent,
- int max_exponent,
- DiyFp* power,
- int* decimal_exponent);
-
- // Returns a cached power of ten x ~= 10^k such that
- // k <= decimal_exponent < k + kCachedPowersDecimalDistance.
- // The given decimal_exponent must satisfy
- // kMinDecimalExponent <= requested_exponent, and
- // requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance.
- static void GetCachedPowerForDecimalExponent(int requested_exponent,
- DiyFp* power,
- int* found_exponent);
-};
-
-} // namespace double_conversion
-
-#endif // DOUBLE_CONVERSION_CACHED_POWERS_H_
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/double-conversion/diy-fp.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/double-conversion/diy-fp.cc b/ext/kenlm/util/double-conversion/diy-fp.cc
deleted file mode 100644
index ddd1891..0000000
--- a/ext/kenlm/util/double-conversion/diy-fp.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2010 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-#include "diy-fp.h"
-#include "utils.h"
-
-namespace double_conversion {
-
-void DiyFp::Multiply(const DiyFp& other) {
- // Simply "emulates" a 128 bit multiplication.
- // However: the resulting number only contains 64 bits. The least
- // significant 64 bits are only used for rounding the most significant 64
- // bits.
- const uint64_t kM32 = 0xFFFFFFFFU;
- uint64_t a = f_ >> 32;
- uint64_t b = f_ & kM32;
- uint64_t c = other.f_ >> 32;
- uint64_t d = other.f_ & kM32;
- uint64_t ac = a * c;
- uint64_t bc = b * c;
- uint64_t ad = a * d;
- uint64_t bd = b * d;
- uint64_t tmp = (bd >> 32) + (ad & kM32) + (bc & kM32);
- // By adding 1U << 31 to tmp we round the final result.
- // Halfway cases will be round up.
- tmp += 1U << 31;
- uint64_t result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32);
- e_ += other.e_ + 64;
- f_ = result_f;
-}
-
-} // namespace double_conversion
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/double-conversion/diy-fp.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/double-conversion/diy-fp.h b/ext/kenlm/util/double-conversion/diy-fp.h
deleted file mode 100644
index 9dcf8fb..0000000
--- a/ext/kenlm/util/double-conversion/diy-fp.h
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright 2010 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef DOUBLE_CONVERSION_DIY_FP_H_
-#define DOUBLE_CONVERSION_DIY_FP_H_
-
-#include "utils.h"
-
-namespace double_conversion {
-
-// This "Do It Yourself Floating Point" class implements a floating-point number
-// with a uint64 significand and an int exponent. Normalized DiyFp numbers will
-// have the most significant bit of the significand set.
-// Multiplication and Subtraction do not normalize their results.
-// DiyFp are not designed to contain special doubles (NaN and Infinity).
-class DiyFp {
- public:
- static const int kSignificandSize = 64;
-
- DiyFp() : f_(0), e_(0) {}
- DiyFp(uint64_t f, int e) : f_(f), e_(e) {}
-
- // this = this - other.
- // The exponents of both numbers must be the same and the significand of this
- // must be bigger than the significand of other.
- // The result will not be normalized.
- void Subtract(const DiyFp& other) {
- ASSERT(e_ == other.e_);
- ASSERT(f_ >= other.f_);
- f_ -= other.f_;
- }
-
- // Returns a - b.
- // The exponents of both numbers must be the same and this must be bigger
- // than other. The result will not be normalized.
- static DiyFp Minus(const DiyFp& a, const DiyFp& b) {
- DiyFp result = a;
- result.Subtract(b);
- return result;
- }
-
-
- // this = this * other.
- void Multiply(const DiyFp& other);
-
- // returns a * b;
- static DiyFp Times(const DiyFp& a, const DiyFp& b) {
- DiyFp result = a;
- result.Multiply(b);
- return result;
- }
-
- void Normalize() {
- ASSERT(f_ != 0);
- uint64_t f = f_;
- int e = e_;
-
- // This method is mainly called for normalizing boundaries. In general
- // boundaries need to be shifted by 10 bits. We thus optimize for this case.
- const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000);
- while ((f & k10MSBits) == 0) {
- f <<= 10;
- e -= 10;
- }
- while ((f & kUint64MSB) == 0) {
- f <<= 1;
- e--;
- }
- f_ = f;
- e_ = e;
- }
-
- static DiyFp Normalize(const DiyFp& a) {
- DiyFp result = a;
- result.Normalize();
- return result;
- }
-
- uint64_t f() const { return f_; }
- int e() const { return e_; }
-
- void set_f(uint64_t new_value) { f_ = new_value; }
- void set_e(int new_value) { e_ = new_value; }
-
- private:
- static const uint64_t kUint64MSB = UINT64_2PART_C(0x80000000, 00000000);
-
- uint64_t f_;
- int e_;
-};
-
-} // namespace double_conversion
-
-#endif // DOUBLE_CONVERSION_DIY_FP_H_
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/double-conversion/double-conversion.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/double-conversion/double-conversion.cc b/ext/kenlm/util/double-conversion/double-conversion.cc
deleted file mode 100644
index 8a7923c..0000000
--- a/ext/kenlm/util/double-conversion/double-conversion.cc
+++ /dev/null
@@ -1,889 +0,0 @@
-// Copyright 2010 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <climits>
-#include <cmath>
-
-#include "double-conversion.h"
-
-#include "bignum-dtoa.h"
-#include "fast-dtoa.h"
-#include "fixed-dtoa.h"
-#include "ieee.h"
-#include "strtod.h"
-#include "utils.h"
-
-namespace double_conversion {
-
-const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConverter() {
- int flags = UNIQUE_ZERO | EMIT_POSITIVE_EXPONENT_SIGN;
- static DoubleToStringConverter converter(flags,
- "Infinity",
- "NaN",
- 'e',
- -6, 21,
- 6, 0);
- return converter;
-}
-
-
-bool DoubleToStringConverter::HandleSpecialValues(
- double value,
- StringBuilder* result_builder) const {
- Double double_inspect(value);
- if (double_inspect.IsInfinite()) {
- if (infinity_symbol_ == NULL) return false;
- if (value < 0) {
- result_builder->AddCharacter('-');
- }
- result_builder->AddString(infinity_symbol_);
- return true;
- }
- if (double_inspect.IsNan()) {
- if (nan_symbol_ == NULL) return false;
- result_builder->AddString(nan_symbol_);
- return true;
- }
- return false;
-}
-
-
-void DoubleToStringConverter::CreateExponentialRepresentation(
- const char* decimal_digits,
- int length,
- int exponent,
- StringBuilder* result_builder) const {
- ASSERT(length != 0);
- result_builder->AddCharacter(decimal_digits[0]);
- if (length != 1) {
- result_builder->AddCharacter('.');
- result_builder->AddSubstring(&decimal_digits[1], length-1);
- }
- result_builder->AddCharacter(exponent_character_);
- if (exponent < 0) {
- result_builder->AddCharacter('-');
- exponent = -exponent;
- } else {
- if ((flags_ & EMIT_POSITIVE_EXPONENT_SIGN) != 0) {
- result_builder->AddCharacter('+');
- }
- }
- if (exponent == 0) {
- result_builder->AddCharacter('0');
- return;
- }
- ASSERT(exponent < 1e4);
- const int kMaxExponentLength = 5;
- char buffer[kMaxExponentLength + 1];
- buffer[kMaxExponentLength] = '\0';
- int first_char_pos = kMaxExponentLength;
- while (exponent > 0) {
- buffer[--first_char_pos] = '0' + (exponent % 10);
- exponent /= 10;
- }
- result_builder->AddSubstring(&buffer[first_char_pos],
- kMaxExponentLength - first_char_pos);
-}
-
-
-void DoubleToStringConverter::CreateDecimalRepresentation(
- const char* decimal_digits,
- int length,
- int decimal_point,
- int digits_after_point,
- StringBuilder* result_builder) const {
- // Create a representation that is padded with zeros if needed.
- if (decimal_point <= 0) {
- // "0.00000decimal_rep".
- result_builder->AddCharacter('0');
- if (digits_after_point > 0) {
- result_builder->AddCharacter('.');
- result_builder->AddPadding('0', -decimal_point);
- ASSERT(length <= digits_after_point - (-decimal_point));
- result_builder->AddSubstring(decimal_digits, length);
- int remaining_digits = digits_after_point - (-decimal_point) - length;
- result_builder->AddPadding('0', remaining_digits);
- }
- } else if (decimal_point >= length) {
- // "decimal_rep0000.00000" or "decimal_rep.0000"
- result_builder->AddSubstring(decimal_digits, length);
- result_builder->AddPadding('0', decimal_point - length);
- if (digits_after_point > 0) {
- result_builder->AddCharacter('.');
- result_builder->AddPadding('0', digits_after_point);
- }
- } else {
- // "decima.l_rep000"
- ASSERT(digits_after_point > 0);
- result_builder->AddSubstring(decimal_digits, decimal_point);
- result_builder->AddCharacter('.');
- ASSERT(length - decimal_point <= digits_after_point);
- result_builder->AddSubstring(&decimal_digits[decimal_point],
- length - decimal_point);
- int remaining_digits = digits_after_point - (length - decimal_point);
- result_builder->AddPadding('0', remaining_digits);
- }
- if (digits_after_point == 0) {
- if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) {
- result_builder->AddCharacter('.');
- }
- if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) {
- result_builder->AddCharacter('0');
- }
- }
-}
-
-
-bool DoubleToStringConverter::ToShortestIeeeNumber(
- double value,
- StringBuilder* result_builder,
- DoubleToStringConverter::DtoaMode mode) const {
- ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE);
- if (Double(value).IsSpecial()) {
- return HandleSpecialValues(value, result_builder);
- }
-
- int decimal_point;
- bool sign;
- const int kDecimalRepCapacity = kBase10MaximalLength + 1;
- char decimal_rep[kDecimalRepCapacity];
- int decimal_rep_length;
-
- DoubleToAscii(value, mode, 0, decimal_rep, kDecimalRepCapacity,
- &sign, &decimal_rep_length, &decimal_point);
-
- bool unique_zero = (flags_ & UNIQUE_ZERO) != 0;
- if (sign && (value != 0.0 || !unique_zero)) {
- result_builder->AddCharacter('-');
- }
-
- int exponent = decimal_point - 1;
- if ((decimal_in_shortest_low_ <= exponent) &&
- (exponent < decimal_in_shortest_high_)) {
- CreateDecimalRepresentation(decimal_rep, decimal_rep_length,
- decimal_point,
- Max(0, decimal_rep_length - decimal_point),
- result_builder);
- } else {
- CreateExponentialRepresentation(decimal_rep, decimal_rep_length, exponent,
- result_builder);
- }
- return true;
-}
-
-
-bool DoubleToStringConverter::ToFixed(double value,
- int requested_digits,
- StringBuilder* result_builder) const {
- ASSERT(kMaxFixedDigitsBeforePoint == 60);
- const double kFirstNonFixed = 1e60;
-
- if (Double(value).IsSpecial()) {
- return HandleSpecialValues(value, result_builder);
- }
-
- if (requested_digits > kMaxFixedDigitsAfterPoint) return false;
- if (value >= kFirstNonFixed || value <= -kFirstNonFixed) return false;
-
- // Find a sufficiently precise decimal representation of n.
- int decimal_point;
- bool sign;
- // Add space for the '\0' byte.
- const int kDecimalRepCapacity =
- kMaxFixedDigitsBeforePoint + kMaxFixedDigitsAfterPoint + 1;
- char decimal_rep[kDecimalRepCapacity];
- int decimal_rep_length;
- DoubleToAscii(value, FIXED, requested_digits,
- decimal_rep, kDecimalRepCapacity,
- &sign, &decimal_rep_length, &decimal_point);
-
- bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
- if (sign && (value != 0.0 || !unique_zero)) {
- result_builder->AddCharacter('-');
- }
-
- CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point,
- requested_digits, result_builder);
- return true;
-}
-
-
-bool DoubleToStringConverter::ToExponential(
- double value,
- int requested_digits,
- StringBuilder* result_builder) const {
- if (Double(value).IsSpecial()) {
- return HandleSpecialValues(value, result_builder);
- }
-
- if (requested_digits < -1) return false;
- if (requested_digits > kMaxExponentialDigits) return false;
-
- int decimal_point;
- bool sign;
- // Add space for digit before the decimal point and the '\0' character.
- const int kDecimalRepCapacity = kMaxExponentialDigits + 2;
- ASSERT(kDecimalRepCapacity > kBase10MaximalLength);
- char decimal_rep[kDecimalRepCapacity];
- int decimal_rep_length;
-
- if (requested_digits == -1) {
- DoubleToAscii(value, SHORTEST, 0,
- decimal_rep, kDecimalRepCapacity,
- &sign, &decimal_rep_length, &decimal_point);
- } else {
- DoubleToAscii(value, PRECISION, requested_digits + 1,
- decimal_rep, kDecimalRepCapacity,
- &sign, &decimal_rep_length, &decimal_point);
- ASSERT(decimal_rep_length <= requested_digits + 1);
-
- for (int i = decimal_rep_length; i < requested_digits + 1; ++i) {
- decimal_rep[i] = '0';
- }
- decimal_rep_length = requested_digits + 1;
- }
-
- bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
- if (sign && (value != 0.0 || !unique_zero)) {
- result_builder->AddCharacter('-');
- }
-
- int exponent = decimal_point - 1;
- CreateExponentialRepresentation(decimal_rep,
- decimal_rep_length,
- exponent,
- result_builder);
- return true;
-}
-
-
-bool DoubleToStringConverter::ToPrecision(double value,
- int precision,
- StringBuilder* result_builder) const {
- if (Double(value).IsSpecial()) {
- return HandleSpecialValues(value, result_builder);
- }
-
- if (precision < kMinPrecisionDigits || precision > kMaxPrecisionDigits) {
- return false;
- }
-
- // Find a sufficiently precise decimal representation of n.
- int decimal_point;
- bool sign;
- // Add one for the terminating null character.
- const int kDecimalRepCapacity = kMaxPrecisionDigits + 1;
- char decimal_rep[kDecimalRepCapacity];
- int decimal_rep_length;
-
- DoubleToAscii(value, PRECISION, precision,
- decimal_rep, kDecimalRepCapacity,
- &sign, &decimal_rep_length, &decimal_point);
- ASSERT(decimal_rep_length <= precision);
-
- bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0);
- if (sign && (value != 0.0 || !unique_zero)) {
- result_builder->AddCharacter('-');
- }
-
- // The exponent if we print the number as x.xxeyyy. That is with the
- // decimal point after the first digit.
- int exponent = decimal_point - 1;
-
- int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0;
- if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) ||
- (decimal_point - precision + extra_zero >
- max_trailing_padding_zeroes_in_precision_mode_)) {
- // Fill buffer to contain 'precision' digits.
- // Usually the buffer is already at the correct length, but 'DoubleToAscii'
- // is allowed to return less characters.
- for (int i = decimal_rep_length; i < precision; ++i) {
- decimal_rep[i] = '0';
- }
-
- CreateExponentialRepresentation(decimal_rep,
- precision,
- exponent,
- result_builder);
- } else {
- CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point,
- Max(0, precision - decimal_point),
- result_builder);
- }
- return true;
-}
-
-
-static BignumDtoaMode DtoaToBignumDtoaMode(
- DoubleToStringConverter::DtoaMode dtoa_mode) {
- switch (dtoa_mode) {
- case DoubleToStringConverter::SHORTEST: return BIGNUM_DTOA_SHORTEST;
- case DoubleToStringConverter::SHORTEST_SINGLE:
- return BIGNUM_DTOA_SHORTEST_SINGLE;
- case DoubleToStringConverter::FIXED: return BIGNUM_DTOA_FIXED;
- case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION;
- default:
- UNREACHABLE();
- return BIGNUM_DTOA_SHORTEST; // To silence compiler.
- }
-}
-
-
-void DoubleToStringConverter::DoubleToAscii(double v,
- DtoaMode mode,
- int requested_digits,
- char* buffer,
- int buffer_length,
- bool* sign,
- int* length,
- int* point) {
- Vector<char> vector(buffer, buffer_length);
- ASSERT(!Double(v).IsSpecial());
- ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE || requested_digits >= 0);
-
- if (Double(v).Sign() < 0) {
- *sign = true;
- v = -v;
- } else {
- *sign = false;
- }
-
- if (mode == PRECISION && requested_digits == 0) {
- vector[0] = '\0';
- *length = 0;
- return;
- }
-
- if (v == 0) {
- vector[0] = '0';
- vector[1] = '\0';
- *length = 1;
- *point = 1;
- return;
- }
-
- bool fast_worked;
- switch (mode) {
- case SHORTEST:
- fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST, 0, vector, length, point);
- break;
- case SHORTEST_SINGLE:
- fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST_SINGLE, 0,
- vector, length, point);
- break;
- case FIXED:
- fast_worked = FastFixedDtoa(v, requested_digits, vector, length, point);
- break;
- case PRECISION:
- fast_worked = FastDtoa(v, FAST_DTOA_PRECISION, requested_digits,
- vector, length, point);
- break;
- default:
- UNREACHABLE();
- fast_worked = false;
- }
- if (fast_worked) return;
-
- // If the fast dtoa didn't succeed use the slower bignum version.
- BignumDtoaMode bignum_mode = DtoaToBignumDtoaMode(mode);
- BignumDtoa(v, bignum_mode, requested_digits, vector, length, point);
- vector[*length] = '\0';
-}
-
-
-// Consumes the given substring from the iterator.
-// Returns false, if the substring does not match.
-static bool ConsumeSubString(const char** current,
- const char* end,
- const char* substring) {
- ASSERT(**current == *substring);
- for (substring++; *substring != '\0'; substring++) {
- ++*current;
- if (*current == end || **current != *substring) return false;
- }
- ++*current;
- return true;
-}
-
-
-// Maximum number of significant digits in decimal representation.
-// The longest possible double in decimal representation is
-// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074
-// (768 digits). If we parse a number whose first digits are equal to a
-// mean of 2 adjacent doubles (that could have up to 769 digits) the result
-// must be rounded to the bigger one unless the tail consists of zeros, so
-// we don't need to preserve all the digits.
-const int kMaxSignificantDigits = 772;
-
-
-// Returns true if a nonspace found and false if the end has reached.
-static inline bool AdvanceToNonspace(const char** current, const char* end) {
- while (*current != end) {
- if (**current != ' ') return true;
- ++*current;
- }
- return false;
-}
-
-
-static bool isDigit(int x, int radix) {
- return (x >= '0' && x <= '9' && x < '0' + radix)
- || (radix > 10 && x >= 'a' && x < 'a' + radix - 10)
- || (radix > 10 && x >= 'A' && x < 'A' + radix - 10);
-}
-
-
-static double SignedZero(bool sign) {
- return sign ? -0.0 : 0.0;
-}
-
-
-// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
-template <int radix_log_2>
-static double RadixStringToIeee(const char* current,
- const char* end,
- bool sign,
- bool allow_trailing_junk,
- double junk_string_value,
- bool read_as_double,
- const char** trailing_pointer) {
- ASSERT(current != end);
-
- const int kDoubleSize = Double::kSignificandSize;
- const int kSingleSize = Single::kSignificandSize;
- const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize;
-
- // Skip leading 0s.
- while (*current == '0') {
- ++current;
- if (current == end) {
- *trailing_pointer = end;
- return SignedZero(sign);
- }
- }
-
- int64_t number = 0;
- int exponent = 0;
- const int radix = (1 << radix_log_2);
-
- do {
- int digit;
- if (*current >= '0' && *current <= '9' && *current < '0' + radix) {
- digit = static_cast<char>(*current) - '0';
- } else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) {
- digit = static_cast<char>(*current) - 'a' + 10;
- } else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) {
- digit = static_cast<char>(*current) - 'A' + 10;
- } else {
- if (allow_trailing_junk || !AdvanceToNonspace(¤t, end)) {
- break;
- } else {
- return junk_string_value;
- }
- }
-
- number = number * radix + digit;
- int overflow = static_cast<int>(number >> kSignificandSize);
- if (overflow != 0) {
- // Overflow occurred. Need to determine which direction to round the
- // result.
- int overflow_bits_count = 1;
- while (overflow > 1) {
- overflow_bits_count++;
- overflow >>= 1;
- }
-
- int dropped_bits_mask = ((1 << overflow_bits_count) - 1);
- int dropped_bits = static_cast<int>(number) & dropped_bits_mask;
- number >>= overflow_bits_count;
- exponent = overflow_bits_count;
-
- bool zero_tail = true;
- while (true) {
- ++current;
- if (current == end || !isDigit(*current, radix)) break;
- zero_tail = zero_tail && *current == '0';
- exponent += radix_log_2;
- }
-
- if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
- return junk_string_value;
- }
-
- int middle_value = (1 << (overflow_bits_count - 1));
- if (dropped_bits > middle_value) {
- number++; // Rounding up.
- } else if (dropped_bits == middle_value) {
- // Rounding to even to consistency with decimals: half-way case rounds
- // up if significant part is odd and down otherwise.
- if ((number & 1) != 0 || !zero_tail) {
- number++; // Rounding up.
- }
- }
-
- // Rounding up may cause overflow.
- if ((number & ((int64_t)1 << kSignificandSize)) != 0) {
- exponent++;
- number >>= 1;
- }
- break;
- }
- ++current;
- } while (current != end);
-
- ASSERT(number < ((int64_t)1 << kSignificandSize));
- ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number);
-
- *trailing_pointer = current;
-
- if (exponent == 0) {
- if (sign) {
- if (number == 0) return -0.0;
- number = -number;
- }
- return static_cast<double>(number);
- }
-
- ASSERT(number != 0);
- return Double(DiyFp(number, exponent)).value();
-}
-
-
-double StringToDoubleConverter::StringToIeee(
- const char* input,
- int length,
- int* processed_characters_count,
- bool read_as_double) const {
- const char* current = input;
- const char* end = input + length;
-
- *processed_characters_count = 0;
-
- const bool allow_trailing_junk = (flags_ & ALLOW_TRAILING_JUNK) != 0;
- const bool allow_leading_spaces = (flags_ & ALLOW_LEADING_SPACES) != 0;
- const bool allow_trailing_spaces = (flags_ & ALLOW_TRAILING_SPACES) != 0;
- const bool allow_spaces_after_sign = (flags_ & ALLOW_SPACES_AFTER_SIGN) != 0;
-
- // To make sure that iterator dereferencing is valid the following
- // convention is used:
- // 1. Each '++current' statement is followed by check for equality to 'end'.
- // 2. If AdvanceToNonspace returned false then current == end.
- // 3. If 'current' becomes equal to 'end' the function returns or goes to
- // 'parsing_done'.
- // 4. 'current' is not dereferenced after the 'parsing_done' label.
- // 5. Code before 'parsing_done' may rely on 'current != end'.
- if (current == end) return empty_string_value_;
-
- if (allow_leading_spaces || allow_trailing_spaces) {
- if (!AdvanceToNonspace(¤t, end)) {
- *processed_characters_count = current - input;
- return empty_string_value_;
- }
- if (!allow_leading_spaces && (input != current)) {
- // No leading spaces allowed, but AdvanceToNonspace moved forward.
- return junk_string_value_;
- }
- }
-
- // The longest form of simplified number is: "-<significant digits>.1eXXX\0".
- const int kBufferSize = kMaxSignificantDigits + 10;
- char buffer[kBufferSize]; // NOLINT: size is known at compile time.
- int buffer_pos = 0;
-
- // Exponent will be adjusted if insignificant digits of the integer part
- // or insignificant leading zeros of the fractional part are dropped.
- int exponent = 0;
- int significant_digits = 0;
- int insignificant_digits = 0;
- bool nonzero_digit_dropped = false;
-
- bool sign = false;
-
- if (*current == '+' || *current == '-') {
- sign = (*current == '-');
- ++current;
- const char* next_non_space = current;
- // Skip following spaces (if allowed).
- if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_;
- if (!allow_spaces_after_sign && (current != next_non_space)) {
- return junk_string_value_;
- }
- current = next_non_space;
- }
-
- if (infinity_symbol_ != NULL) {
- if (*current == infinity_symbol_[0]) {
- if (!ConsumeSubString(¤t, end, infinity_symbol_)) {
- return junk_string_value_;
- }
-
- if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
- return junk_string_value_;
- }
- if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
- return junk_string_value_;
- }
-
- ASSERT(buffer_pos == 0);
- *processed_characters_count = current - input;
- return sign ? -Double::Infinity() : Double::Infinity();
- }
- }
-
- if (nan_symbol_ != NULL) {
- if (*current == nan_symbol_[0]) {
- if (!ConsumeSubString(¤t, end, nan_symbol_)) {
- return junk_string_value_;
- }
-
- if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
- return junk_string_value_;
- }
- if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
- return junk_string_value_;
- }
-
- ASSERT(buffer_pos == 0);
- *processed_characters_count = current - input;
- return sign ? -Double::NaN() : Double::NaN();
- }
- }
-
- bool leading_zero = false;
- if (*current == '0') {
- ++current;
- if (current == end) {
- *processed_characters_count = current - input;
- return SignedZero(sign);
- }
-
- leading_zero = true;
-
- // It could be hexadecimal value.
- if ((flags_ & ALLOW_HEX) && (*current == 'x' || *current == 'X')) {
- ++current;
- if (current == end || !isDigit(*current, 16)) {
- return junk_string_value_; // "0x".
- }
-
- const char* tail_pointer = NULL;
- double result = RadixStringToIeee<4>(current,
- end,
- sign,
- allow_trailing_junk,
- junk_string_value_,
- read_as_double,
- &tail_pointer);
- if (tail_pointer != NULL) {
- if (allow_trailing_spaces) AdvanceToNonspace(&tail_pointer, end);
- *processed_characters_count = tail_pointer - input;
- }
- return result;
- }
-
- // Ignore leading zeros in the integer part.
- while (*current == '0') {
- ++current;
- if (current == end) {
- *processed_characters_count = current - input;
- return SignedZero(sign);
- }
- }
- }
-
- bool octal = leading_zero && (flags_ & ALLOW_OCTALS) != 0;
-
- // Copy significant digits of the integer part (if any) to the buffer.
- while (*current >= '0' && *current <= '9') {
- if (significant_digits < kMaxSignificantDigits) {
- ASSERT(buffer_pos < kBufferSize);
- buffer[buffer_pos++] = static_cast<char>(*current);
- significant_digits++;
- // Will later check if it's an octal in the buffer.
- } else {
- insignificant_digits++; // Move the digit into the exponential part.
- nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
- }
- octal = octal && *current < '8';
- ++current;
- if (current == end) goto parsing_done;
- }
-
- if (significant_digits == 0) {
- octal = false;
- }
-
- if (*current == '.') {
- if (octal && !allow_trailing_junk) return junk_string_value_;
- if (octal) goto parsing_done;
-
- ++current;
- if (current == end) {
- if (significant_digits == 0 && !leading_zero) {
- return junk_string_value_;
- } else {
- goto parsing_done;
- }
- }
-
- if (significant_digits == 0) {
- // octal = false;
- // Integer part consists of 0 or is absent. Significant digits start after
- // leading zeros (if any).
- while (*current == '0') {
- ++current;
- if (current == end) {
- *processed_characters_count = current - input;
- return SignedZero(sign);
- }
- exponent--; // Move this 0 into the exponent.
- }
- }
-
- // There is a fractional part.
- // We don't emit a '.', but adjust the exponent instead.
- while (*current >= '0' && *current <= '9') {
- if (significant_digits < kMaxSignificantDigits) {
- ASSERT(buffer_pos < kBufferSize);
- buffer[buffer_pos++] = static_cast<char>(*current);
- significant_digits++;
- exponent--;
- } else {
- // Ignore insignificant digits in the fractional part.
- nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
- }
- ++current;
- if (current == end) goto parsing_done;
- }
- }
-
- if (!leading_zero && exponent == 0 && significant_digits == 0) {
- // If leading_zeros is true then the string contains zeros.
- // If exponent < 0 then string was [+-]\.0*...
- // If significant_digits != 0 the string is not equal to 0.
- // Otherwise there are no digits in the string.
- return junk_string_value_;
- }
-
- // Parse exponential part.
- if (*current == 'e' || *current == 'E') {
- if (octal && !allow_trailing_junk) return junk_string_value_;
- if (octal) goto parsing_done;
- ++current;
- if (current == end) {
- if (allow_trailing_junk) {
- goto parsing_done;
- } else {
- return junk_string_value_;
- }
- }
- char sign = '+';
- if (*current == '+' || *current == '-') {
- sign = static_cast<char>(*current);
- ++current;
- if (current == end) {
- if (allow_trailing_junk) {
- goto parsing_done;
- } else {
- return junk_string_value_;
- }
- }
- }
-
- if (current == end || *current < '0' || *current > '9') {
- if (allow_trailing_junk) {
- goto parsing_done;
- } else {
- return junk_string_value_;
- }
- }
-
- const int max_exponent = INT_MAX / 2;
- ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2);
- int num = 0;
- do {
- // Check overflow.
- int digit = *current - '0';
- if (num >= max_exponent / 10
- && !(num == max_exponent / 10 && digit <= max_exponent % 10)) {
- num = max_exponent;
- } else {
- num = num * 10 + digit;
- }
- ++current;
- } while (current != end && *current >= '0' && *current <= '9');
-
- exponent += (sign == '-' ? -num : num);
- }
-
- if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) {
- return junk_string_value_;
- }
- if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) {
- return junk_string_value_;
- }
- if (allow_trailing_spaces) {
- AdvanceToNonspace(¤t, end);
- }
-
- parsing_done:
- exponent += insignificant_digits;
-
- if (octal) {
- double result;
- const char* tail_pointer = NULL;
- result = RadixStringToIeee<3>(buffer,
- buffer + buffer_pos,
- sign,
- allow_trailing_junk,
- junk_string_value_,
- read_as_double,
- &tail_pointer);
- ASSERT(tail_pointer != NULL);
- *processed_characters_count = current - input;
- return result;
- }
-
- if (nonzero_digit_dropped) {
- buffer[buffer_pos++] = '1';
- exponent--;
- }
-
- ASSERT(buffer_pos < kBufferSize);
- buffer[buffer_pos] = '\0';
-
- double converted;
- if (read_as_double) {
- converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent);
- } else {
- converted = Strtof(Vector<const char>(buffer, buffer_pos), exponent);
- }
- *processed_characters_count = current - input;
- return sign? -converted: converted;
-}
-
-} // namespace double_conversion
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/double-conversion/double-conversion.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/double-conversion/double-conversion.h b/ext/kenlm/util/double-conversion/double-conversion.h
deleted file mode 100644
index 1c3387d..0000000
--- a/ext/kenlm/util/double-conversion/double-conversion.h
+++ /dev/null
@@ -1,536 +0,0 @@
-// Copyright 2012 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_
-#define DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_
-
-#include "utils.h"
-
-namespace double_conversion {
-
-class DoubleToStringConverter {
- public:
- // When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint
- // or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the
- // function returns false.
- static const int kMaxFixedDigitsBeforePoint = 60;
- static const int kMaxFixedDigitsAfterPoint = 60;
-
- // When calling ToExponential with a requested_digits
- // parameter > kMaxExponentialDigits then the function returns false.
- static const int kMaxExponentialDigits = 120;
-
- // When calling ToPrecision with a requested_digits
- // parameter < kMinPrecisionDigits or requested_digits > kMaxPrecisionDigits
- // then the function returns false.
- static const int kMinPrecisionDigits = 1;
- static const int kMaxPrecisionDigits = 120;
-
- enum Flags {
- NO_FLAGS = 0,
- EMIT_POSITIVE_EXPONENT_SIGN = 1,
- EMIT_TRAILING_DECIMAL_POINT = 2,
- EMIT_TRAILING_ZERO_AFTER_POINT = 4,
- UNIQUE_ZERO = 8
- };
-
- // Flags should be a bit-or combination of the possible Flags-enum.
- // - NO_FLAGS: no special flags.
- // - EMIT_POSITIVE_EXPONENT_SIGN: when the number is converted into exponent
- // form, emits a '+' for positive exponents. Example: 1.2e+2.
- // - EMIT_TRAILING_DECIMAL_POINT: when the input number is an integer and is
- // converted into decimal format then a trailing decimal point is appended.
- // Example: 2345.0 is converted to "2345.".
- // - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point
- // emits a trailing '0'-character. This flag requires the
- // EXMIT_TRAILING_DECIMAL_POINT flag.
- // Example: 2345.0 is converted to "2345.0".
- // - UNIQUE_ZERO: "-0.0" is converted to "0.0".
- //
- // Infinity symbol and nan_symbol provide the string representation for these
- // special values. If the string is NULL and the special value is encountered
- // then the conversion functions return false.
- //
- // The exponent_character is used in exponential representations. It is
- // usually 'e' or 'E'.
- //
- // When converting to the shortest representation the converter will
- // represent input numbers in decimal format if they are in the interval
- // [10^decimal_in_shortest_low; 10^decimal_in_shortest_high[
- // (lower boundary included, greater boundary excluded).
- // Example: with decimal_in_shortest_low = -6 and
- // decimal_in_shortest_high = 21:
- // ToShortest(0.000001) -> "0.000001"
- // ToShortest(0.0000001) -> "1e-7"
- // ToShortest(111111111111111111111.0) -> "111111111111111110000"
- // ToShortest(100000000000000000000.0) -> "100000000000000000000"
- // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21"
- //
- // When converting to precision mode the converter may add
- // max_leading_padding_zeroes before returning the number in exponential
- // format.
- // Example with max_leading_padding_zeroes_in_precision_mode = 6.
- // ToPrecision(0.0000012345, 2) -> "0.0000012"
- // ToPrecision(0.00000012345, 2) -> "1.2e-7"
- // Similarily the converter may add up to
- // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid
- // returning an exponential representation. A zero added by the
- // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit.
- // Examples for max_trailing_padding_zeroes_in_precision_mode = 1:
- // ToPrecision(230.0, 2) -> "230"
- // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT.
- // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT.
- DoubleToStringConverter(int flags,
- const char* infinity_symbol,
- const char* nan_symbol,
- char exponent_character,
- int decimal_in_shortest_low,
- int decimal_in_shortest_high,
- int max_leading_padding_zeroes_in_precision_mode,
- int max_trailing_padding_zeroes_in_precision_mode)
- : flags_(flags),
- infinity_symbol_(infinity_symbol),
- nan_symbol_(nan_symbol),
- exponent_character_(exponent_character),
- decimal_in_shortest_low_(decimal_in_shortest_low),
- decimal_in_shortest_high_(decimal_in_shortest_high),
- max_leading_padding_zeroes_in_precision_mode_(
- max_leading_padding_zeroes_in_precision_mode),
- max_trailing_padding_zeroes_in_precision_mode_(
- max_trailing_padding_zeroes_in_precision_mode) {
- // When 'trailing zero after the point' is set, then 'trailing point'
- // must be set too.
- ASSERT(((flags & EMIT_TRAILING_DECIMAL_POINT) != 0) ||
- !((flags & EMIT_TRAILING_ZERO_AFTER_POINT) != 0));
- }
-
- // Returns a converter following the EcmaScript specification.
- static const DoubleToStringConverter& EcmaScriptConverter();
-
- // Computes the shortest string of digits that correctly represent the input
- // number. Depending on decimal_in_shortest_low and decimal_in_shortest_high
- // (see constructor) it then either returns a decimal representation, or an
- // exponential representation.
- // Example with decimal_in_shortest_low = -6,
- // decimal_in_shortest_high = 21,
- // EMIT_POSITIVE_EXPONENT_SIGN activated, and
- // EMIT_TRAILING_DECIMAL_POINT deactived:
- // ToShortest(0.000001) -> "0.000001"
- // ToShortest(0.0000001) -> "1e-7"
- // ToShortest(111111111111111111111.0) -> "111111111111111110000"
- // ToShortest(100000000000000000000.0) -> "100000000000000000000"
- // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21"
- //
- // Note: the conversion may round the output if the returned string
- // is accurate enough to uniquely identify the input-number.
- // For example the most precise representation of the double 9e59 equals
- // "899999999999999918767229449717619953810131273674690656206848", but
- // the converter will return the shorter (but still correct) "9e59".
- //
- // Returns true if the conversion succeeds. The conversion always succeeds
- // except when the input value is special and no infinity_symbol or
- // nan_symbol has been given to the constructor.
- bool ToShortest(double value, StringBuilder* result_builder) const {
- return ToShortestIeeeNumber(value, result_builder, SHORTEST);
- }
-
- // Same as ToShortest, but for single-precision floats.
- bool ToShortestSingle(float value, StringBuilder* result_builder) const {
- return ToShortestIeeeNumber(value, result_builder, SHORTEST_SINGLE);
- }
-
-
- // Computes a decimal representation with a fixed number of digits after the
- // decimal point. The last emitted digit is rounded.
- //
- // Examples:
- // ToFixed(3.12, 1) -> "3.1"
- // ToFixed(3.1415, 3) -> "3.142"
- // ToFixed(1234.56789, 4) -> "1234.5679"
- // ToFixed(1.23, 5) -> "1.23000"
- // ToFixed(0.1, 4) -> "0.1000"
- // ToFixed(1e30, 2) -> "1000000000000000019884624838656.00"
- // ToFixed(0.1, 30) -> "0.100000000000000005551115123126"
- // ToFixed(0.1, 17) -> "0.10000000000000001"
- //
- // If requested_digits equals 0, then the tail of the result depends on
- // the EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT.
- // Examples, for requested_digits == 0,
- // let EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT be
- // - false and false: then 123.45 -> 123
- // 0.678 -> 1
- // - true and false: then 123.45 -> 123.
- // 0.678 -> 1.
- // - true and true: then 123.45 -> 123.0
- // 0.678 -> 1.0
- //
- // Returns true if the conversion succeeds. The conversion always succeeds
- // except for the following cases:
- // - the input value is special and no infinity_symbol or nan_symbol has
- // been provided to the constructor,
- // - 'value' > 10^kMaxFixedDigitsBeforePoint, or
- // - 'requested_digits' > kMaxFixedDigitsAfterPoint.
- // The last two conditions imply that the result will never contain more than
- // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters
- // (one additional character for the sign, and one for the decimal point).
- bool ToFixed(double value,
- int requested_digits,
- StringBuilder* result_builder) const;
-
- // Computes a representation in exponential format with requested_digits
- // after the decimal point. The last emitted digit is rounded.
- // If requested_digits equals -1, then the shortest exponential representation
- // is computed.
- //
- // Examples with EMIT_POSITIVE_EXPONENT_SIGN deactivated, and
- // exponent_character set to 'e'.
- // ToExponential(3.12, 1) -> "3.1e0"
- // ToExponential(5.0, 3) -> "5.000e0"
- // ToExponential(0.001, 2) -> "1.00e-3"
- // ToExponential(3.1415, -1) -> "3.1415e0"
- // ToExponential(3.1415, 4) -> "3.1415e0"
- // ToExponential(3.1415, 3) -> "3.142e0"
- // ToExponential(123456789000000, 3) -> "1.235e14"
- // ToExponential(1000000000000000019884624838656.0, -1) -> "1e30"
- // ToExponential(1000000000000000019884624838656.0, 32) ->
- // "1.00000000000000001988462483865600e30"
- // ToExponential(1234, 0) -> "1e3"
- //
- // Returns true if the conversion succeeds. The conversion always succeeds
- // except for the following cases:
- // - the input value is special and no infinity_symbol or nan_symbol has
- // been provided to the constructor,
- // - 'requested_digits' > kMaxExponentialDigits.
- // The last condition implies that the result will never contain more than
- // kMaxExponentialDigits + 8 characters (the sign, the digit before the
- // decimal point, the decimal point, the exponent character, the
- // exponent's sign, and at most 3 exponent digits).
- bool ToExponential(double value,
- int requested_digits,
- StringBuilder* result_builder) const;
-
- // Computes 'precision' leading digits of the given 'value' and returns them
- // either in exponential or decimal format, depending on
- // max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the
- // constructor).
- // The last computed digit is rounded.
- //
- // Example with max_leading_padding_zeroes_in_precision_mode = 6.
- // ToPrecision(0.0000012345, 2) -> "0.0000012"
- // ToPrecision(0.00000012345, 2) -> "1.2e-7"
- // Similarily the converter may add up to
- // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid
- // returning an exponential representation. A zero added by the
- // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit.
- // Examples for max_trailing_padding_zeroes_in_precision_mode = 1:
- // ToPrecision(230.0, 2) -> "230"
- // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT.
- // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT.
- // Examples for max_trailing_padding_zeroes_in_precision_mode = 3, and no
- // EMIT_TRAILING_ZERO_AFTER_POINT:
- // ToPrecision(123450.0, 6) -> "123450"
- // ToPrecision(123450.0, 5) -> "123450"
- // ToPrecision(123450.0, 4) -> "123500"
- // ToPrecision(123450.0, 3) -> "123000"
- // ToPrecision(123450.0, 2) -> "1.2e5"
- //
- // Returns true if the conversion succeeds. The conversion always succeeds
- // except for the following cases:
- // - the input value is special and no infinity_symbol or nan_symbol has
- // been provided to the constructor,
- // - precision < kMinPericisionDigits
- // - precision > kMaxPrecisionDigits
- // The last condition implies that the result will never contain more than
- // kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the
- // exponent character, the exponent's sign, and at most 3 exponent digits).
- bool ToPrecision(double value,
- int precision,
- StringBuilder* result_builder) const;
-
- enum DtoaMode {
- // Produce the shortest correct representation.
- // For example the output of 0.299999999999999988897 is (the less accurate
- // but correct) 0.3.
- SHORTEST,
- // Same as SHORTEST, but for single-precision floats.
- SHORTEST_SINGLE,
- // Produce a fixed number of digits after the decimal point.
- // For instance fixed(0.1, 4) becomes 0.1000
- // If the input number is big, the output will be big.
- FIXED,
- // Fixed number of digits (independent of the decimal point).
- PRECISION
- };
-
- // The maximal number of digits that are needed to emit a double in base 10.
- // A higher precision can be achieved by using more digits, but the shortest
- // accurate representation of any double will never use more digits than
- // kBase10MaximalLength.
- // Note that DoubleToAscii null-terminates its input. So the given buffer
- // should be at least kBase10MaximalLength + 1 characters long.
- static const int kBase10MaximalLength = 17;
-
- // Converts the given double 'v' to ascii. 'v' must not be NaN, +Infinity, or
- // -Infinity. In SHORTEST_SINGLE-mode this restriction also applies to 'v'
- // after it has been casted to a single-precision float. That is, in this
- // mode static_cast<float>(v) must not be NaN, +Infinity or -Infinity.
- //
- // The result should be interpreted as buffer * 10^(point-length).
- //
- // The output depends on the given mode:
- // - SHORTEST: produce the least amount of digits for which the internal
- // identity requirement is still satisfied. If the digits are printed
- // (together with the correct exponent) then reading this number will give
- // 'v' again. The buffer will choose the representation that is closest to
- // 'v'. If there are two at the same distance, than the one farther away
- // from 0 is chosen (halfway cases - ending with 5 - are rounded up).
- // In this mode the 'requested_digits' parameter is ignored.
- // - SHORTEST_SINGLE: same as SHORTEST but with single-precision.
- // - FIXED: produces digits necessary to print a given number with
- // 'requested_digits' digits after the decimal point. The produced digits
- // might be too short in which case the caller has to fill the remainder
- // with '0's.
- // Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2.
- // Halfway cases are rounded towards +/-Infinity (away from 0). The call
- // toFixed(0.15, 2) thus returns buffer="2", point=0.
- // The returned buffer may contain digits that would be truncated from the
- // shortest representation of the input.
- // - PRECISION: produces 'requested_digits' where the first digit is not '0'.
- // Even though the length of produced digits usually equals
- // 'requested_digits', the function is allowed to return fewer digits, in
- // which case the caller has to fill the missing digits with '0's.
- // Halfway cases are again rounded away from 0.
- // DoubleToAscii expects the given buffer to be big enough to hold all
- // digits and a terminating null-character. In SHORTEST-mode it expects a
- // buffer of at least kBase10MaximalLength + 1. In all other modes the
- // requested_digits parameter and the padding-zeroes limit the size of the
- // output. Don't forget the decimal point, the exponent character and the
- // terminating null-character when computing the maximal output size.
- // The given length is only used in debug mode to ensure the buffer is big
- // enough.
- static void DoubleToAscii(double v,
- DtoaMode mode,
- int requested_digits,
- char* buffer,
- int buffer_length,
- bool* sign,
- int* length,
- int* point);
-
- private:
- // Implementation for ToShortest and ToShortestSingle.
- bool ToShortestIeeeNumber(double value,
- StringBuilder* result_builder,
- DtoaMode mode) const;
-
- // If the value is a special value (NaN or Infinity) constructs the
- // corresponding string using the configured infinity/nan-symbol.
- // If either of them is NULL or the value is not special then the
- // function returns false.
- bool HandleSpecialValues(double value, StringBuilder* result_builder) const;
- // Constructs an exponential representation (i.e. 1.234e56).
- // The given exponent assumes a decimal point after the first decimal digit.
- void CreateExponentialRepresentation(const char* decimal_digits,
- int length,
- int exponent,
- StringBuilder* result_builder) const;
- // Creates a decimal representation (i.e 1234.5678).
- void CreateDecimalRepresentation(const char* decimal_digits,
- int length,
- int decimal_point,
- int digits_after_point,
- StringBuilder* result_builder) const;
-
- const int flags_;
- const char* const infinity_symbol_;
- const char* const nan_symbol_;
- const char exponent_character_;
- const int decimal_in_shortest_low_;
- const int decimal_in_shortest_high_;
- const int max_leading_padding_zeroes_in_precision_mode_;
- const int max_trailing_padding_zeroes_in_precision_mode_;
-
- DISALLOW_IMPLICIT_CONSTRUCTORS(DoubleToStringConverter);
-};
-
-
-class StringToDoubleConverter {
- public:
- // Enumeration for allowing octals and ignoring junk when converting
- // strings to numbers.
- enum Flags {
- NO_FLAGS = 0,
- ALLOW_HEX = 1,
- ALLOW_OCTALS = 2,
- ALLOW_TRAILING_JUNK = 4,
- ALLOW_LEADING_SPACES = 8,
- ALLOW_TRAILING_SPACES = 16,
- ALLOW_SPACES_AFTER_SIGN = 32
- };
-
- // Flags should be a bit-or combination of the possible Flags-enum.
- // - NO_FLAGS: no special flags.
- // - ALLOW_HEX: recognizes the prefix "0x". Hex numbers may only be integers.
- // Ex: StringToDouble("0x1234") -> 4660.0
- // In StringToDouble("0x1234.56") the characters ".56" are trailing
- // junk. The result of the call is hence dependent on
- // the ALLOW_TRAILING_JUNK flag and/or the junk value.
- // With this flag "0x" is a junk-string. Even with ALLOW_TRAILING_JUNK,
- // the string will not be parsed as "0" followed by junk.
- //
- // - ALLOW_OCTALS: recognizes the prefix "0" for octals:
- // If a sequence of octal digits starts with '0', then the number is
- // read as octal integer. Octal numbers may only be integers.
- // Ex: StringToDouble("01234") -> 668.0
- // StringToDouble("012349") -> 12349.0 // Not a sequence of octal
- // // digits.
- // In StringToDouble("01234.56") the characters ".56" are trailing
- // junk. The result of the call is hence dependent on
- // the ALLOW_TRAILING_JUNK flag and/or the junk value.
- // In StringToDouble("01234e56") the characters "e56" are trailing
- // junk, too.
- // - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of
- // a double literal.
- // - ALLOW_LEADING_SPACES: skip over leading spaces.
- // - ALLOW_TRAILING_SPACES: ignore trailing spaces.
- // - ALLOW_SPACES_AFTER_SIGN: ignore spaces after the sign.
- // Ex: StringToDouble("- 123.2") -> -123.2.
- // StringToDouble("+ 123.2") -> 123.2
- //
- // empty_string_value is returned when an empty string is given as input.
- // If ALLOW_LEADING_SPACES or ALLOW_TRAILING_SPACES are set, then a string
- // containing only spaces is converted to the 'empty_string_value', too.
- //
- // junk_string_value is returned when
- // a) ALLOW_TRAILING_JUNK is not set, and a junk character (a character not
- // part of a double-literal) is found.
- // b) ALLOW_TRAILING_JUNK is set, but the string does not start with a
- // double literal.
- //
- // infinity_symbol and nan_symbol are strings that are used to detect
- // inputs that represent infinity and NaN. They can be null, in which case
- // they are ignored.
- // The conversion routine first reads any possible signs. Then it compares the
- // following character of the input-string with the first character of
- // the infinity, and nan-symbol. If either matches, the function assumes, that
- // a match has been found, and expects the following input characters to match
- // the remaining characters of the special-value symbol.
- // This means that the following restrictions apply to special-value symbols:
- // - they must not start with signs ('+', or '-'),
- // - they must not have the same first character.
- // - they must not start with digits.
- //
- // Examples:
- // flags = ALLOW_HEX | ALLOW_TRAILING_JUNK,
- // empty_string_value = 0.0,
- // junk_string_value = NaN,
- // infinity_symbol = "infinity",
- // nan_symbol = "nan":
- // StringToDouble("0x1234") -> 4660.0.
- // StringToDouble("0x1234K") -> 4660.0.
- // StringToDouble("") -> 0.0 // empty_string_value.
- // StringToDouble(" ") -> NaN // junk_string_value.
- // StringToDouble(" 1") -> NaN // junk_string_value.
- // StringToDouble("0x") -> NaN // junk_string_value.
- // StringToDouble("-123.45") -> -123.45.
- // StringToDouble("--123.45") -> NaN // junk_string_value.
- // StringToDouble("123e45") -> 123e45.
- // StringToDouble("123E45") -> 123e45.
- // StringToDouble("123e+45") -> 123e45.
- // StringToDouble("123E-45") -> 123e-45.
- // StringToDouble("123e") -> 123.0 // trailing junk ignored.
- // StringToDouble("123e-") -> 123.0 // trailing junk ignored.
- // StringToDouble("+NaN") -> NaN // NaN string literal.
- // StringToDouble("-infinity") -> -inf. // infinity literal.
- // StringToDouble("Infinity") -> NaN // junk_string_value.
- //
- // flags = ALLOW_OCTAL | ALLOW_LEADING_SPACES,
- // empty_string_value = 0.0,
- // junk_string_value = NaN,
- // infinity_symbol = NULL,
- // nan_symbol = NULL:
- // StringToDouble("0x1234") -> NaN // junk_string_value.
- // StringToDouble("01234") -> 668.0.
- // StringToDouble("") -> 0.0 // empty_string_value.
- // StringToDouble(" ") -> 0.0 // empty_string_value.
- // StringToDouble(" 1") -> 1.0
- // StringToDouble("0x") -> NaN // junk_string_value.
- // StringToDouble("0123e45") -> NaN // junk_string_value.
- // StringToDouble("01239E45") -> 1239e45.
- // StringToDouble("-infinity") -> NaN // junk_string_value.
- // StringToDouble("NaN") -> NaN // junk_string_value.
- StringToDoubleConverter(int flags,
- double empty_string_value,
- double junk_string_value,
- const char* infinity_symbol,
- const char* nan_symbol)
- : flags_(flags),
- empty_string_value_(empty_string_value),
- junk_string_value_(junk_string_value),
- infinity_symbol_(infinity_symbol),
- nan_symbol_(nan_symbol) {
- }
-
- // Performs the conversion.
- // The output parameter 'processed_characters_count' is set to the number
- // of characters that have been processed to read the number.
- // Spaces than are processed with ALLOW_{LEADING|TRAILING}_SPACES are included
- // in the 'processed_characters_count'. Trailing junk is never included.
- double StringToDouble(const char* buffer,
- int length,
- int* processed_characters_count) const {
- return StringToIeee(buffer, length, processed_characters_count, true);
- }
-
- // Same as StringToDouble but reads a float.
- // Note that this is not equivalent to static_cast<float>(StringToDouble(...))
- // due to potential double-rounding.
- float StringToFloat(const char* buffer,
- int length,
- int* processed_characters_count) const {
- return static_cast<float>(StringToIeee(buffer, length,
- processed_characters_count, false));
- }
-
- private:
- const int flags_;
- const double empty_string_value_;
- const double junk_string_value_;
- const char* const infinity_symbol_;
- const char* const nan_symbol_;
-
- double StringToIeee(const char* buffer,
- int length,
- int* processed_characters_count,
- bool read_as_double) const;
-
- DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter);
-};
-
-} // namespace double_conversion
-
-#endif // DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_