You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/05/24 00:29:49 UTC
arrow git commit: ARROW-1061: [C++] Harden decimal parsing against
invalid strings
Repository: arrow
Updated Branches:
refs/heads/master bc70fae14 -> 3d8b1906b
ARROW-1061: [C++] Harden decimal parsing against invalid strings
Author: Phillip Cloud <cp...@gmail.com>
Closes #708 from cpcloud/ARROW-1061 and squashes the following commits:
c93d1db8 [Phillip Cloud] ARROW-1061: [C++] Harden decimal parsing against invalid strings
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/3d8b1906
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/3d8b1906
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/3d8b1906
Branch: refs/heads/master
Commit: 3d8b1906ba7b0a6c856e8f3aeb54621489080794
Parents: bc70fae
Author: Phillip Cloud <cp...@gmail.com>
Authored: Tue May 23 20:29:44 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Tue May 23 20:29:44 2017 -0400
----------------------------------------------------------------------
cpp/src/arrow/util/decimal-test.cc | 80 +++++++++++++++++++++++++++++++++
cpp/src/arrow/util/decimal.cc | 75 ++++++++++++++++++++++++-------
2 files changed, 139 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/3d8b1906/cpp/src/arrow/util/decimal-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/decimal-test.cc b/cpp/src/arrow/util/decimal-test.cc
index 5d95c2c..72107a2 100644
--- a/cpp/src/arrow/util/decimal-test.cc
+++ b/cpp/src/arrow/util/decimal-test.cc
@@ -54,6 +54,29 @@ TYPED_TEST(DecimalTest, TestFromString) {
ASSERT_EQ(scale, 5);
}
+TEST(DecimalTest, TestStringStartingWithPlus) {
+ std::string plus_value("+234.234");
+ Decimal32 out;
+ int scale;
+ int precision;
+ ASSERT_OK(FromString(plus_value, &out, &precision, &scale));
+ ASSERT_EQ(234234, out.value);
+ ASSERT_EQ(6, precision);
+ ASSERT_EQ(3, scale);
+}
+
+TEST(DecimalTest, TestStringStartingWithPlus128) {
+ std::string plus_value("+2342394230592.232349023094");
+ decimal::int128_t expected_value("2342394230592232349023094");
+ Decimal128 out;
+ int scale;
+ int precision;
+ ASSERT_OK(FromString(plus_value, &out, &precision, &scale));
+ ASSERT_EQ(expected_value, out.value);
+ ASSERT_EQ(25, precision);
+ ASSERT_EQ(12, scale);
+}
+
TEST(DecimalTest, TestStringToInt32) {
int32_t value = 0;
StringToInteger("123", "456", 1, &value);
@@ -160,6 +183,63 @@ TEST(DecimalTest, TestDecimal128StringAndBytesRoundTrip) {
ASSERT_EQ(expected.value, result.value);
}
+TEST(DecimalTest, TestInvalidInputMinus) {
+ std::string invalid_value("-");
+ Decimal32 out;
+ Status status = decimal::FromString(invalid_value, &out);
+ ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputDot) {
+ std::string invalid_value("0.0.0");
+ Decimal32 out;
+ Status status = decimal::FromString(invalid_value, &out);
+ ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputEmbeddedMinus) {
+ std::string invalid_value("0-13-32");
+ Decimal32 out;
+ Status status = decimal::FromString(invalid_value, &out);
+ ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputSingleChar) {
+ std::string invalid_value("a");
+ Decimal32 out;
+ Status status = decimal::FromString(invalid_value, &out);
+ ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputWithValidSubstring) {
+ std::string invalid_value("-23092.235-");
+ Decimal32 out;
+ Status status = decimal::FromString(invalid_value, &out);
+ auto msg = status.message();
+ ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputWithMinusPlus) {
+ std::string invalid_value("-+23092.235");
+ Decimal32 out;
+ Status status = decimal::FromString(invalid_value, &out);
+ ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputWithPlusMinus) {
+ std::string invalid_value("+-23092.235");
+ Decimal32 out;
+ Status status = decimal::FromString(invalid_value, &out);
+ ASSERT_RAISES(Invalid, status);
+}
+
+TEST(DecimalTest, TestInvalidInputWithLeadingZeros) {
+ std::string invalid_value("00a");
+ Decimal32 out;
+ Status status = decimal::FromString(invalid_value, &out);
+ ASSERT_RAISES(Invalid, status);
+}
+
template <typename T>
class DecimalZerosTest : public ::testing::Test {};
TYPED_TEST_CASE(DecimalZerosTest, DecimalTypes);
http://git-wip-us.apache.org/repos/asf/arrow/blob/3d8b1906/cpp/src/arrow/util/decimal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 734df13..72ede35 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -29,18 +29,27 @@ ARROW_EXPORT Status FromString(
}
int8_t sign = 1;
- auto charp = s.c_str();
- auto end = charp + s.length();
+ std::string::const_iterator charp = s.cbegin();
+ std::string::const_iterator end = s.cend();
- if (*charp == '+' || *charp == '-') {
- if (*charp == '-') { sign = -1; }
+ char first_char = *charp;
+ if (first_char == '+' || first_char == '-') {
+ if (first_char == '-') { sign = -1; }
++charp;
}
- auto numeric_string_start = charp;
+ if (charp == end) {
+ std::stringstream ss;
+ ss << "Single character: '" << first_char << "' is not a valid decimal value";
+ return Status::Invalid(ss.str());
+ }
+
+ std::string::const_iterator numeric_string_start = charp;
+
+ DCHECK_LT(charp, end);
// skip leading zeros
- while (*charp == '0') {
+ while (charp != end && *charp == '0') {
++charp;
}
@@ -59,25 +68,59 @@ ARROW_EXPORT Status FromString(
return Status::OK();
}
- auto whole_part_start = charp;
- while (isdigit(*charp)) {
+ std::string::const_iterator whole_part_start = charp;
+
+ while (charp != end && isdigit(*charp)) {
++charp;
}
- auto whole_part_end = charp;
+
+ std::string::const_iterator whole_part_end = charp;
std::string whole_part(whole_part_start, whole_part_end);
- if (*charp == '.') {
+ if (charp != end && *charp == '.') {
++charp;
+
+ if (charp == end) {
+ return Status::Invalid(
+ "Decimal point must be followed by at least one base ten digit. Reached the "
+ "end of the string.");
+ }
+
+ if (!isdigit(*charp)) {
+ std::stringstream ss;
+ ss << "Decimal point must be followed by a base ten digit. Found '" << *charp
+ << "'";
+ return Status::Invalid(ss.str());
+ }
} else {
- // no decimal point
- DCHECK_EQ(charp, end);
+ if (charp != end) {
+ std::stringstream ss;
+ ss << "Expected base ten digit or decimal point but found '" << *charp
+ << "' instead.";
+ return Status::Invalid(ss.str());
+ }
}
- auto fractional_part_start = charp;
- while (isdigit(*charp)) {
- ++charp;
+ std::string::const_iterator fractional_part_start = charp;
+
+ // The rest must be digits, because if we have a decimal point it must be followed by
+ // digits
+ if (charp != end) {
+ while (charp != end && isdigit(*charp)) {
+ ++charp;
+ }
+
+ // The while loop has ended before the end of the string which means we've hit a
+ // character that isn't a base ten digit
+ if (charp != end) {
+ std::stringstream ss;
+ ss << "Found non base ten digit character '" << *charp
+ << "' before the end of the string";
+ return Status::Invalid(ss.str());
+ }
}
- auto fractional_part_end = charp;
+
+ std::string::const_iterator fractional_part_end = charp;
std::string fractional_part(fractional_part_start, fractional_part_end);
if (precision != nullptr) {