You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/04/19 21:12:00 UTC

arrow git commit: ARROW-858: Remove boost_regex from arrow dependencies

Repository: arrow
Updated Branches:
  refs/heads/master 391242a17 -> 74f89cfbe


ARROW-858: Remove boost_regex from arrow dependencies

Author: Phillip Cloud <cp...@gmail.com>

Closes #567 from cpcloud/decimal-no-regex and squashes the following commits:

b5c59bd [Phillip Cloud] ARROW-858: Remove boost_regex from arrow dependencies


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/74f89cfb
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/74f89cfb
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/74f89cfb

Branch: refs/heads/master
Commit: 74f89cfbe0793043eb579ec30b3d6467b0ad9af2
Parents: 391242a
Author: Phillip Cloud <cp...@gmail.com>
Authored: Wed Apr 19 17:11:51 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Apr 19 17:11:51 2017 -0400

----------------------------------------------------------------------
 .travis.yml                              |  1 -
 ci/travis_script_python.sh               |  1 +
 cpp/CMakeLists.txt                       | 22 ++------
 cpp/README.md                            |  1 -
 cpp/src/arrow/ipc/CMakeLists.txt         |  4 +-
 cpp/src/arrow/ipc/ipc-read-write-test.cc |  4 +-
 cpp/src/arrow/python/CMakeLists.txt      |  3 +-
 cpp/src/arrow/util/decimal-test.cc       | 40 +++++++++++++++
 cpp/src/arrow/util/decimal.cc            | 73 +++++++++++++++++++++------
 9 files changed, 108 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 824f62b..6ebebd4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,7 +14,6 @@ addons:
     - valgrind
     - libboost-dev
     - libboost-filesystem-dev
-    - libboost-regex-dev
     - libboost-system-dev
     - libjemalloc-dev
     - gtk-doc-tools

http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/ci/travis_script_python.sh
----------------------------------------------------------------------
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index bde1fd7..c1426da 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -22,6 +22,7 @@ pushd $ARROW_PYTHON_DIR
 export PARQUET_HOME=$TRAVIS_BUILD_DIR/parquet-env
 
 build_parquet_cpp() {
+  export PARQUET_ARROW_VERSION=$(git rev-parse HEAD)
   conda create -y -q -p $PARQUET_HOME python=3.6
   source activate $PARQUET_HOME
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index c1cf785..81e4c90 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -410,19 +410,16 @@ if (ARROW_BOOST_USE_SHARED)
     add_definitions(-DBOOST_ALL_DYN_LINK)
   endif()
 
-  find_package(Boost COMPONENTS system filesystem regex REQUIRED)
+  find_package(Boost COMPONENTS system filesystem REQUIRED)
   if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
     set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
     set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
-    set(BOOST_SHARED_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_DEBUG})
   else()
     set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
     set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
-    set(BOOST_SHARED_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_RELEASE})
   endif()
   set(BOOST_SYSTEM_LIBRARY boost_system_shared)
   set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared)
-  set(BOOST_REGEX_LIBRARY boost_regex_shared)
 else()
   # Find static boost headers and libs
   # TODO Differentiate here between release and debug builds
@@ -431,15 +428,12 @@ else()
   if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
     set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
     set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
-    set(BOOST_STATIC_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_DEBUG})
   else()
     set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
     set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
-    set(BOOST_STATIC_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_RELEASE})
   endif()
   set(BOOST_SYSTEM_LIBRARY boost_system_static)
   set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
-  set(BOOST_REGEX_LIBRARY boost_regex_static)
 endif()
 
 message(STATUS "Boost include dir: " ${Boost_INCLUDE_DIRS})
@@ -453,11 +447,7 @@ ADD_THIRDPARTY_LIB(boost_filesystem
     STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}"
     SHARED_LIB "${BOOST_SHARED_FILESYSTEM_LIBRARY}")
 
-ADD_THIRDPARTY_LIB(boost_regex
-        STATIC_LIB "${BOOST_STATIC_REGEX_LIBRARY}"
-        SHARED_LIB "${BOOST_SHARED_REGEX_LIBRARY}")
-
-SET(ARROW_BOOST_LIBS boost_system boost_filesystem boost_regex)
+SET(ARROW_BOOST_LIBS boost_system boost_filesystem)
 
 include_directories(SYSTEM ${Boost_INCLUDE_DIR})
 
@@ -758,8 +748,7 @@ set(ARROW_MIN_TEST_LIBS
   arrow_static
   gtest
   gtest_main
-  ${ARROW_BASE_LIBS}
-  ${BOOST_REGEX_LIBRARY})
+  ${ARROW_BASE_LIBS})
 
 if (APPLE)
   set(ARROW_MIN_TEST_LIBS
@@ -777,8 +766,7 @@ set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
 set(ARROW_BENCHMARK_LINK_LIBS
   arrow_static
   arrow_benchmark_main
-  ${ARROW_BASE_LIBS}
-  ${BOOST_REGEX_LIBRARY})
+  ${ARROW_BASE_LIBS})
 
 ############################################################
 # "make ctags" target
@@ -875,7 +863,7 @@ endif()
 ############################################################
 
 set(ARROW_LINK_LIBS
-  ${BOOST_REGEX_LIBRARY})
+    )
 
 set(ARROW_STATIC_LINK_LIBS)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/README.md
----------------------------------------------------------------------
diff --git a/cpp/README.md b/cpp/README.md
index 339b6b4..69c6950 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -31,7 +31,6 @@ On Ubuntu/Debian you can install the requirements with:
 sudo apt-get install cmake \
      libboost-dev \
      libboost-filesystem-dev \
-     libboost-regex-dev \
      libboost-system-dev
 ```
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/src/arrow/ipc/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index 37b4553..fc1d53e 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -91,14 +91,12 @@ if(MSVC)
   set(UTIL_LINK_LIBS
     arrow_static
     ${BOOST_FILESYSTEM_LIBRARY}
-    ${BOOST_SYSTEM_LIBRARY}
-    ${BOOST_REGEX_LIBRARY})
+    ${BOOST_SYSTEM_LIBRARY})
 else()
   set(UTIL_LINK_LIBS
     arrow_static
     ${BOOST_FILESYSTEM_LIBRARY}
     ${BOOST_SYSTEM_LIBRARY}
-    ${BOOST_REGEX_LIBRARY}
     dl)
 endif()
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/src/arrow/ipc/ipc-read-write-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/ipc-read-write-test.cc b/cpp/src/arrow/ipc/ipc-read-write-test.cc
index b39136e..cd793e0 100644
--- a/cpp/src/arrow/ipc/ipc-read-write-test.cc
+++ b/cpp/src/arrow/ipc/ipc-read-write-test.cc
@@ -322,8 +322,8 @@ TEST_F(TestWriteRecordBatch, SliceTruncatesBuffers) {
   std::vector<int32_t> type_ids(a0->length());
   std::shared_ptr<Buffer> ids_buffer;
   ASSERT_OK(test::CopyBufferFromVector(type_ids, &ids_buffer));
-  a1 = std::make_shared<UnionArray>(
-      union_type, a0->length(), struct_children, ids_buffer);
+  a1 =
+      std::make_shared<UnionArray>(union_type, a0->length(), struct_children, ids_buffer);
   CheckArray(a1);
 
   // Dense union

http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/src/arrow/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt
index 5c2b588..c5cbc50 100644
--- a/cpp/src/arrow/python/CMakeLists.txt
+++ b/cpp/src/arrow/python/CMakeLists.txt
@@ -35,8 +35,7 @@ endif()
 set(ARROW_PYTHON_MIN_TEST_LIBS
   arrow_python_test_main
   arrow_python_static
-  arrow_static
-  ${BOOST_REGEX_LIBRARY})
+  arrow_static)
 
 set(ARROW_PYTHON_TEST_LINK_LIBS ${ARROW_PYTHON_MIN_TEST_LIBS})
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/src/arrow/util/decimal-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/decimal-test.cc b/cpp/src/arrow/util/decimal-test.cc
index dcaa9af..5d95c2c 100644
--- a/cpp/src/arrow/util/decimal-test.cc
+++ b/cpp/src/arrow/util/decimal-test.cc
@@ -159,5 +159,45 @@ TEST(DecimalTest, TestDecimal128StringAndBytesRoundTrip) {
 
   ASSERT_EQ(expected.value, result.value);
 }
+
+template <typename T>
+class DecimalZerosTest : public ::testing::Test {};
+TYPED_TEST_CASE(DecimalZerosTest, DecimalTypes);
+
+TYPED_TEST(DecimalZerosTest, LeadingZerosNoDecimalPoint) {
+  std::string string_value("0000000");
+  Decimal<TypeParam> d;
+  int precision;
+  int scale;
+  FromString(string_value, &d, &precision, &scale);
+  ASSERT_EQ(precision, 7);
+  ASSERT_EQ(scale, 0);
+  ASSERT_EQ(d.value, 0);
+}
+
+TYPED_TEST(DecimalZerosTest, LeadingZerosDecimalPoint) {
+  std::string string_value("000.0000");
+  Decimal<TypeParam> d;
+  int precision;
+  int scale;
+  FromString(string_value, &d, &precision, &scale);
+  // We explicitly do not support this for now, otherwise this would be ASSERT_EQ
+  ASSERT_NE(precision, 7);
+
+  ASSERT_EQ(scale, 4);
+  ASSERT_EQ(d.value, 0);
+}
+
+TYPED_TEST(DecimalZerosTest, NoLeadingZerosDecimalPoint) {
+  std::string string_value(".00000");
+  Decimal<TypeParam> d;
+  int precision;
+  int scale;
+  FromString(string_value, &d, &precision, &scale);
+  ASSERT_EQ(precision, 5);
+  ASSERT_EQ(scale, 5);
+  ASSERT_EQ(d.value, 0);
+}
+
 }  // namespace decimal
 }  // namespace arrow

http://git-wip-us.apache.org/repos/asf/arrow/blob/74f89cfb/cpp/src/arrow/util/decimal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
index 3b8a3ff..2fe9da4 100644
--- a/cpp/src/arrow/util/decimal.cc
+++ b/cpp/src/arrow/util/decimal.cc
@@ -17,34 +17,77 @@
 
 #include "arrow/util/decimal.h"
 
-#include <boost/regex.hpp>
-
 namespace arrow {
 namespace decimal {
 
-static const boost::regex DECIMAL_PATTERN("(\\+?|-?)((0*)(\\d*))(\\.(\\d+))?");
-
 template <typename T>
 ARROW_EXPORT Status FromString(
     const std::string& s, Decimal<T>* out, int* precision, int* scale) {
+  // Implements this regex: "(\\+?|-?)((0*)(\\d*))(\\.(\\d+))?";
   if (s.empty()) {
     return Status::Invalid("Empty string cannot be converted to decimal");
   }
-  boost::smatch match;
-  if (!boost::regex_match(s, match, DECIMAL_PATTERN)) {
-    std::stringstream ss;
-    ss << "String " << s << " is not a valid decimal string";
-    return Status::Invalid(ss.str());
+
+  int8_t sign = 1;
+  auto charp = s.cbegin();
+  auto end = s.cend();
+
+  if (*charp == '+' || *charp == '-') {
+    if (*charp == '-') { sign = -1; }
+    ++charp;
   }
-  const int8_t sign = match[1].str() == "-" ? -1 : 1;
-  std::string whole_part = match[4].str();
-  std::string fractional_part = match[6].str();
-  if (scale != nullptr) { *scale = static_cast<int>(fractional_part.size()); }
+
+  auto numeric_string_start = charp;
+
+  // skip leading zeros
+  while (*charp == '0') {
+    ++charp;
+  }
+
+  // all zeros and no decimal point
+  if (charp == end) {
+    if (out != nullptr) { out->value = static_cast<T>(0); }
+
+    // Not sure what other libraries assign precision to for this case (this case of
+    // a string consisting only of one or more zeros)
+    if (precision != nullptr) {
+      *precision = static_cast<int>(charp - numeric_string_start);
+    }
+
+    if (scale != nullptr) { *scale = 0; }
+
+    return Status::OK();
+  }
+
+  auto whole_part_start = charp;
+  while (isdigit(*charp)) {
+    ++charp;
+  }
+  auto whole_part_end = charp;
+  std::string whole_part(whole_part_start, whole_part_end);
+
+  if (*charp == '.') {
+    ++charp;
+  } else {
+    // no decimal point
+    DCHECK_EQ(charp, end);
+  }
+
+  auto fractional_part_start = charp;
+  while (isdigit(*charp)) {
+    ++charp;
+  }
+  auto fractional_part_end = charp;
+  std::string fractional_part(fractional_part_start, fractional_part_end);
+
   if (precision != nullptr) {
-    *precision =
-        static_cast<int>(whole_part.size()) + static_cast<int>(fractional_part.size());
+    *precision = static_cast<int>(whole_part.size() + fractional_part.size());
   }
+
+  if (scale != nullptr) { *scale = static_cast<int>(fractional_part.size()); }
+
   if (out != nullptr) { StringToInteger(whole_part, fractional_part, sign, &out->value); }
+
   return Status::OK();
 }