You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/05/24 00:29:49 UTC

arrow git commit: ARROW-1061: [C++] Harden decimal parsing against invalid strings

Repository: arrow
Updated Branches:
  refs/heads/master bc70fae14 -> 3d8b1906b


ARROW-1061: [C++] Harden decimal parsing against invalid strings

Author: Phillip Cloud <cp...@gmail.com>

Closes #708 from cpcloud/ARROW-1061 and squashes the following commits:

c93d1db8 [Phillip Cloud] ARROW-1061: [C++] Harden decimal parsing against invalid strings


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/3d8b1906
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/3d8b1906
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/3d8b1906

Branch: refs/heads/master
Commit: 3d8b1906ba7b0a6c856e8f3aeb54621489080794
Parents: bc70fae
Author: Phillip Cloud <cp...@gmail.com>
Authored: Tue May 23 20:29:44 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue May 23 20:29:44 2017 -0400

----------------------------------------------------------------------
 cpp/src/arrow/util/decimal-test.cc | 80 +++++++++++++++++++++++++++++++++
 cpp/src/arrow/util/decimal.cc      | 75 ++++++++++++++++++++++++-------
 2 files changed, 139 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/3d8b1906/cpp/src/arrow/util/decimal-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/decimal-test.cc b/cpp/src/arrow/util/decimal-test.cc
index 5d95c2c..72107a2 100644
--- a/cpp/src/arrow/util/decimal-test.cc
+++ b/cpp/src/arrow/util/decimal-test.cc
@@ -54,6 +54,29 @@ TYPED_TEST(DecimalTest, TestFromString) {
   ASSERT_EQ(scale, 5);
 }
 
+TEST(DecimalTest, TestStringStartingWithPlus) {
+  std::string plus_value("+234.234");
+  Decimal32 out;
+  int scale;
+  int precision;
+  ASSERT_OK(FromString(plus_value, &out, &precision, &scale));
+  ASSERT_EQ(234234, out.value);
+  ASSERT_EQ(6, precision);
+  ASSERT_EQ(3, scale);
+}
+
+TEST(DecimalTest, TestStringStartingWithPlus128) {
+  std::string plus_value("+2342394230592.232349023094");
+  decimal::int128_t expected_value("2342394230592232349023094");
+  Decimal128 out;
+  int scale;
+  int precision;
+  ASSERT_OK(FromString(plus_value, &out, &precision, &scale));
+  ASSERT_EQ(expected_value, out.value);
+  ASSERT_EQ(25, precision);
+  ASSERT_EQ(12, scale);
+}
+
 TEST(DecimalTest, TestStringToInt32) {
   int32_t value = 0;
   StringToInteger("123", "456", 1, &value);
@@ -160,6 +183,63 @@ TEST(DecimalTest, TestDecimal128StringAndBytesRoundTrip) {
   ASSERT_EQ(expected.value, result.value);
 }
 
+TEST(DecimalTest, TestInvalidInputMinus) {
+  std::string invalid_value("-");
+  Decimal32 out;
+  Status status = decimal::FromString(invalid_value, &out);
+  ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputDot) {
+  std::string invalid_value("0.0.0");
+  Decimal32 out;
+  Status status = decimal::FromString(invalid_value, &out);
+  ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputEmbeddedMinus) {
+  std::string invalid_value("0-13-32");
+  Decimal32 out;
+  Status status = decimal::FromString(invalid_value, &out);
+  ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputSingleChar) {
+  std::string invalid_value("a");
+  Decimal32 out;
+  Status status = decimal::FromString(invalid_value, &out);
+  ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputWithValidSubstring) {
+  std::string invalid_value("-23092.235-");
+  Decimal32 out;
+  Status status = decimal::FromString(invalid_value, &out);
+  auto msg = status.message();
+  ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputWithMinusPlus) {
+  std::string invalid_value("-+23092.235");
+  Decimal32 out;
+  Status status = decimal::FromString(invalid_value, &out);
+  ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputWithPlusMinus) {
+  std::string invalid_value("+-23092.235");
+  Decimal32 out;
+  Status status = decimal::FromString(invalid_value, &out);
+  ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputWithLeadingZeros) {
+  std::string invalid_value("00a");
+  Decimal32 out;
+  Status status = decimal::FromString(invalid_value, &out);
+  ASSERT_RAISES(Invalid, status);
+}
+
 template <typename T>
 class DecimalZerosTest : public ::testing::Test {};
 TYPED_TEST_CASE(DecimalZerosTest, DecimalTypes);

http://git-wip-us.apache.org/repos/asf/arrow/blob/3d8b1906/cpp/src/arrow/util/decimal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 734df13..72ede35 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -29,18 +29,27 @@ ARROW_EXPORT Status FromString(
   }
 
   int8_t sign = 1;
-  auto charp = s.c_str();
-  auto end = charp + s.length();
+  std::string::const_iterator charp = s.cbegin();
+  std::string::const_iterator end = s.cend();
 
-  if (*charp == '+' || *charp == '-') {
-    if (*charp == '-') { sign = -1; }
+  char first_char = *charp;
+  if (first_char == '+' || first_char == '-') {
+    if (first_char == '-') { sign = -1; }
     ++charp;
   }
 
-  auto numeric_string_start = charp;
+  if (charp == end) {
+    std::stringstream ss;
+    ss << "Single character: '" << first_char << "' is not a valid decimal value";
+    return Status::Invalid(ss.str());
+  }
+
+  std::string::const_iterator numeric_string_start = charp;
+
+  DCHECK_LT(charp, end);
 
   // skip leading zeros
-  while (*charp == '0') {
+  while (charp != end && *charp == '0') {
     ++charp;
   }
 
@@ -59,25 +68,59 @@ ARROW_EXPORT Status FromString(
     return Status::OK();
   }
 
-  auto whole_part_start = charp;
-  while (isdigit(*charp)) {
+  std::string::const_iterator whole_part_start = charp;
+
+  while (charp != end && isdigit(*charp)) {
     ++charp;
   }
-  auto whole_part_end = charp;
+
+  std::string::const_iterator whole_part_end = charp;
   std::string whole_part(whole_part_start, whole_part_end);
 
-  if (*charp == '.') {
+  if (charp != end && *charp == '.') {
     ++charp;
+
+    if (charp == end) {
+      return Status::Invalid(
+          "Decimal point must be followed by at least one base ten digit. Reached the "
+          "end of the string.");
+    }
+
+    if (!isdigit(*charp)) {
+      std::stringstream ss;
+      ss << "Decimal point must be followed by a base ten digit. Found '" << *charp
+         << "'";
+      return Status::Invalid(ss.str());
+    }
   } else {
-    // no decimal point
-    DCHECK_EQ(charp, end);
+    if (charp != end) {
+      std::stringstream ss;
+      ss << "Expected base ten digit or decimal point but found '" << *charp
+         << "' instead.";
+      return Status::Invalid(ss.str());
+    }
   }
 
-  auto fractional_part_start = charp;
-  while (isdigit(*charp)) {
-    ++charp;
+  std::string::const_iterator fractional_part_start = charp;
+
+  // The rest must be digits, because if we have a decimal point it must be followed by
+  // digits
+  if (charp != end) {
+    while (charp != end && isdigit(*charp)) {
+      ++charp;
+    }
+
+    // The while loop has ended before the end of the string which means we've hit a
+    // character that isn't a base ten digit
+    if (charp != end) {
+      std::stringstream ss;
+      ss << "Found non base ten digit character '" << *charp
+         << "' before the end of the string";
+      return Status::Invalid(ss.str());
+    }
   }
-  auto fractional_part_end = charp;
+
+  std::string::const_iterator fractional_part_end = charp;
   std::string fractional_part(fractional_part_start, fractional_part_end);
 
   if (precision != nullptr) {