You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/04/19 21:12:00 UTC
arrow git commit: ARROW-858: Remove boost_regex from arrow
dependencies
Repository: arrow
Updated Branches:
refs/heads/master 391242a17 -> 74f89cfbe
ARROW-858: Remove boost_regex from arrow dependencies
Author: Phillip Cloud <cp...@gmail.com>
Closes #567 from cpcloud/decimal-no-regex and squashes the following commits:
b5c59bd [Phillip Cloud] ARROW-858: Remove boost_regex from arrow dependencies
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/74f89cfb
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/74f89cfb
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/74f89cfb
Branch: refs/heads/master
Commit: 74f89cfbe0793043eb579ec30b3d6467b0ad9af2
Parents: 391242a
Author: Phillip Cloud <cp...@gmail.com>
Authored: Wed Apr 19 17:11:51 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Apr 19 17:11:51 2017 -0400
----------------------------------------------------------------------
.travis.yml | 1 -
ci/travis_script_python.sh | 1 +
cpp/CMakeLists.txt | 22 ++------
cpp/README.md | 1 -
cpp/src/arrow/ipc/CMakeLists.txt | 4 +-
cpp/src/arrow/ipc/ipc-read-write-test.cc | 4 +-
cpp/src/arrow/python/CMakeLists.txt | 3 +-
cpp/src/arrow/util/decimal-test.cc | 40 +++++++++++++++
cpp/src/arrow/util/decimal.cc | 73 +++++++++++++++++++++------
9 files changed, 108 insertions(+), 41 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 824f62b..6ebebd4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,7 +14,6 @@ addons:
- valgrind
- libboost-dev
- libboost-filesystem-dev
- - libboost-regex-dev
- libboost-system-dev
- libjemalloc-dev
- gtk-doc-tools
http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/ci/travis_script_python.sh
----------------------------------------------------------------------
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index bde1fd7..c1426da 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -22,6 +22,7 @@ pushd $ARROW_PYTHON_DIR
export PARQUET_HOME=$TRAVIS_BUILD_DIR/parquet-env
build_parquet_cpp() {
+ export PARQUET_ARROW_VERSION=$(git rev-parse HEAD)
conda create -y -q -p $PARQUET_HOME python=3.6
source activate $PARQUET_HOME
http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index c1cf785..81e4c90 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -410,19 +410,16 @@ if (ARROW_BOOST_USE_SHARED)
add_definitions(-DBOOST_ALL_DYN_LINK)
endif()
- find_package(Boost COMPONENTS system filesystem regex REQUIRED)
+ find_package(Boost COMPONENTS system filesystem REQUIRED)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
- set(BOOST_SHARED_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_DEBUG})
else()
set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
- set(BOOST_SHARED_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_RELEASE})
endif()
set(BOOST_SYSTEM_LIBRARY boost_system_shared)
set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared)
- set(BOOST_REGEX_LIBRARY boost_regex_shared)
else()
# Find static boost headers and libs
# TODO Differentiate here between release and debug builds
@@ -431,15 +428,12 @@ else()
if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
- set(BOOST_STATIC_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_DEBUG})
else()
set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
- set(BOOST_STATIC_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_RELEASE})
endif()
set(BOOST_SYSTEM_LIBRARY boost_system_static)
set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
- set(BOOST_REGEX_LIBRARY boost_regex_static)
endif()
message(STATUS "Boost include dir: " ${Boost_INCLUDE_DIRS})
@@ -453,11 +447,7 @@ ADD_THIRDPARTY_LIB(boost_filesystem
STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}"
SHARED_LIB "${BOOST_SHARED_FILESYSTEM_LIBRARY}")
-ADD_THIRDPARTY_LIB(boost_regex
- STATIC_LIB "${BOOST_STATIC_REGEX_LIBRARY}"
- SHARED_LIB "${BOOST_SHARED_REGEX_LIBRARY}")
-
-SET(ARROW_BOOST_LIBS boost_system boost_filesystem boost_regex)
+SET(ARROW_BOOST_LIBS boost_system boost_filesystem)
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
@@ -758,8 +748,7 @@ set(ARROW_MIN_TEST_LIBS
arrow_static
gtest
gtest_main
- ${ARROW_BASE_LIBS}
- ${BOOST_REGEX_LIBRARY})
+ ${ARROW_BASE_LIBS})
if (APPLE)
set(ARROW_MIN_TEST_LIBS
@@ -777,8 +766,7 @@ set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
set(ARROW_BENCHMARK_LINK_LIBS
arrow_static
arrow_benchmark_main
- ${ARROW_BASE_LIBS}
- ${BOOST_REGEX_LIBRARY})
+ ${ARROW_BASE_LIBS})
############################################################
# "make ctags" target
@@ -875,7 +863,7 @@ endif()
############################################################
set(ARROW_LINK_LIBS
- ${BOOST_REGEX_LIBRARY})
+ )
set(ARROW_STATIC_LINK_LIBS)
http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/README.md
----------------------------------------------------------------------
diff --git a/cpp/README.md b/cpp/README.md
index 339b6b4..69c6950 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -31,7 +31,6 @@ On Ubuntu/Debian you can install the requirements with:
sudo apt-get install cmake \
libboost-dev \
libboost-filesystem-dev \
- libboost-regex-dev \
libboost-system-dev
```
http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/src/arrow/ipc/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index 37b4553..fc1d53e 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -91,14 +91,12 @@ if(MSVC)
set(UTIL_LINK_LIBS
arrow_static
${BOOST_FILESYSTEM_LIBRARY}
- ${BOOST_SYSTEM_LIBRARY}
- ${BOOST_REGEX_LIBRARY})
+ ${BOOST_SYSTEM_LIBRARY})
else()
set(UTIL_LINK_LIBS
arrow_static
${BOOST_FILESYSTEM_LIBRARY}
${BOOST_SYSTEM_LIBRARY}
- ${BOOST_REGEX_LIBRARY}
dl)
endif()
http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/src/arrow/ipc/ipc-read-write-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-read-write-test.cc b/cpp/src/arrow/ipc/ipc-read-write-test.cc
index b39136e..cd793e0 100644
--- a/cpp/src/arrow/ipc/ipc-read-write-test.cc
+++ b/cpp/src/arrow/ipc/ipc-read-write-test.cc
@@ -322,8 +322,8 @@ TEST_F(TestWriteRecordBatch, SliceTruncatesBuffers) {
std::vector<int32_t> type_ids(a0->length());
std::shared_ptr<Buffer> ids_buffer;
ASSERT_OK(test::CopyBufferFromVector(type_ids, &ids_buffer));
- a1 = std::make_shared<UnionArray>(
- union_type, a0->length(), struct_children, ids_buffer);
+ a1 =
+ std::make_shared<UnionArray>(union_type, a0->length(), struct_children, ids_buffer);
CheckArray(a1);
// Dense union
http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/src/arrow/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt
index 5c2b588..c5cbc50 100644
--- a/cpp/src/arrow/python/CMakeLists.txt
+++ b/cpp/src/arrow/python/CMakeLists.txt
@@ -35,8 +35,7 @@ endif()
set(ARROW_PYTHON_MIN_TEST_LIBS
arrow_python_test_main
arrow_python_static
- arrow_static
- ${BOOST_REGEX_LIBRARY})
+ arrow_static)
set(ARROW_PYTHON_TEST_LINK_LIBS ${ARROW_PYTHON_MIN_TEST_LIBS})
http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/src/arrow/util/decimal-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/decimal-test.cc b/cpp/src/arrow/util/decimal-test.cc
index dcaa9af..5d95c2c 100644
--- a/cpp/src/arrow/util/decimal-test.cc
+++ b/cpp/src/arrow/util/decimal-test.cc
@@ -159,5 +159,45 @@ TEST(DecimalTest, TestDecimal128StringAndBytesRoundTrip) {
ASSERT_EQ(expected.value, result.value);
}
+
+template <typename T>
+class DecimalZerosTest : public ::testing::Test {};
+TYPED_TEST_CASE(DecimalZerosTest, DecimalTypes);
+
+TYPED_TEST(DecimalZerosTest, LeadingZerosNoDecimalPoint) {
+ std::string string_value("0000000");
+ Decimal<TypeParam> d;
+ int precision;
+ int scale;
+ FromString(string_value, &d, &precision, &scale);
+ ASSERT_EQ(precision, 7);
+ ASSERT_EQ(scale, 0);
+ ASSERT_EQ(d.value, 0);
+}
+
+TYPED_TEST(DecimalZerosTest, LeadingZerosDecimalPoint) {
+ std::string string_value("000.0000");
+ Decimal<TypeParam> d;
+ int precision;
+ int scale;
+ FromString(string_value, &d, &precision, &scale);
+ // We explicitly do not support this for now, otherwise this would be ASSERT_EQ
+ ASSERT_NE(precision, 7);
+
+ ASSERT_EQ(scale, 4);
+ ASSERT_EQ(d.value, 0);
+}
+
+TYPED_TEST(DecimalZerosTest, NoLeadingZerosDecimalPoint) {
+ std::string string_value(".00000");
+ Decimal<TypeParam> d;
+ int precision;
+ int scale;
+ FromString(string_value, &d, &precision, &scale);
+ ASSERT_EQ(precision, 5);
+ ASSERT_EQ(scale, 5);
+ ASSERT_EQ(d.value, 0);
+}
+
} // namespace decimal
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/src/arrow/util/decimal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 3b8a3ff..2fe9da4 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -17,34 +17,77 @@
#include "arrow/util/decimal.h"
-#include <boost/regex.hpp>
-
namespace arrow {
namespace decimal {
-static const boost::regex DECIMAL_PATTERN("(\\+?|-?)((0*)(\\d*))(\\.(\\d+))?");
-
template <typename T>
ARROW_EXPORT Status FromString(
const std::string& s, Decimal<T>* out, int* precision, int* scale) {
+ // Implements this regex: "(\\+?|-?)((0*)(\\d*))(\\.(\\d+))?";
if (s.empty()) {
return Status::Invalid("Empty string cannot be converted to decimal");
}
- boost::smatch match;
- if (!boost::regex_match(s, match, DECIMAL_PATTERN)) {
- std::stringstream ss;
- ss << "String " << s << " is not a valid decimal string";
- return Status::Invalid(ss.str());
+
+ int8_t sign = 1;
+ auto charp = s.cbegin();
+ auto end = s.cend();
+
+ if (*charp == '+' || *charp == '-') {
+ if (*charp == '-') { sign = -1; }
+ ++charp;
}
- const int8_t sign = match[1].str() == "-" ? -1 : 1;
- std::string whole_part = match[4].str();
- std::string fractional_part = match[6].str();
- if (scale != nullptr) { *scale = static_cast<int>(fractional_part.size()); }
+
+ auto numeric_string_start = charp;
+
+ // skip leading zeros
+ while (*charp == '0') {
+ ++charp;
+ }
+
+ // all zeros and no decimal point
+ if (charp == end) {
+ if (out != nullptr) { out->value = static_cast<T>(0); }
+
+ // Not sure what other libraries assign precision to for this case (this case of
+ // a string consisting only of one or more zeros)
+ if (precision != nullptr) {
+ *precision = static_cast<int>(charp - numeric_string_start);
+ }
+
+ if (scale != nullptr) { *scale = 0; }
+
+ return Status::OK();
+ }
+
+ auto whole_part_start = charp;
+ while (isdigit(*charp)) {
+ ++charp;
+ }
+ auto whole_part_end = charp;
+ std::string whole_part(whole_part_start, whole_part_end);
+
+ if (*charp == '.') {
+ ++charp;
+ } else {
+ // no decimal point
+ DCHECK_EQ(charp, end);
+ }
+
+ auto fractional_part_start = charp;
+ while (isdigit(*charp)) {
+ ++charp;
+ }
+ auto fractional_part_end = charp;
+ std::string fractional_part(fractional_part_start, fractional_part_end);
+
if (precision != nullptr) {
- *precision =
- static_cast<int>(whole_part.size()) + static_cast<int>(fractional_part.size());
+ *precision = static_cast<int>(whole_part.size() + fractional_part.size());
}
+
+ if (scale != nullptr) { *scale = static_cast<int>(fractional_part.size()); }
+
if (out != nullptr) { StringToInteger(whole_part, fractional_part, sign, &out->value); }
+
return Status::OK();
}