You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/04/09 19:19:59 UTC
[1/2] arrow git commit: ARROW-655: [C++/Python] Implement DecimalArray
Repository: arrow
Updated Branches:
refs/heads/master 449f99162 -> 754bcce68
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/python/pyarrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
index 253be45..4b931bf 100644
--- a/python/pyarrow/schema.pyx
+++ b/python/pyarrow/schema.pyx
@@ -29,6 +29,7 @@ from pyarrow.array cimport Array
from pyarrow.error cimport check_status
from pyarrow.includes.libarrow cimport (CDataType, CStructType, CListType,
CFixedSizeBinaryType,
+ CDecimalType,
TimeUnit_SECOND, TimeUnit_MILLI,
TimeUnit_MICRO, TimeUnit_NANO,
Type, TimeUnit)
@@ -45,7 +46,7 @@ cdef class DataType:
def __cinit__(self):
pass
- cdef init(self, const shared_ptr[CDataType]& type):
+ cdef void init(self, const shared_ptr[CDataType]& type):
self.sp_type = type
self.type = type.get()
@@ -66,14 +67,14 @@ cdef class DataType:
cdef class DictionaryType(DataType):
- cdef init(self, const shared_ptr[CDataType]& type):
+ cdef void init(self, const shared_ptr[CDataType]& type):
DataType.init(self, type)
self.dict_type = <const CDictionaryType*> type.get()
cdef class TimestampType(DataType):
- cdef init(self, const shared_ptr[CDataType]& type):
+ cdef void init(self, const shared_ptr[CDataType]& type):
DataType.init(self, type)
self.ts_type = <const CTimestampType*> type.get()
@@ -93,7 +94,7 @@ cdef class TimestampType(DataType):
cdef class FixedSizeBinaryType(DataType):
- cdef init(self, const shared_ptr[CDataType]& type):
+ cdef void init(self, const shared_ptr[CDataType]& type):
DataType.init(self, type)
self.fixed_size_binary_type = <const CFixedSizeBinaryType*> type.get()
@@ -103,6 +104,13 @@ cdef class FixedSizeBinaryType(DataType):
return self.fixed_size_binary_type.byte_width()
+cdef class DecimalType(FixedSizeBinaryType):
+
+ cdef void init(self, const shared_ptr[CDataType]& type):
+ DataType.init(self, type)
+ self.decimal_type = <const CDecimalType*> type.get()
+
+
cdef class Field:
def __cinit__(self):
@@ -354,6 +362,12 @@ def float64():
return primitive_type(la.Type_DOUBLE)
+cpdef DataType decimal(int precision, int scale=0):
+ cdef shared_ptr[CDataType] decimal_type
+ decimal_type.reset(new CDecimalType(precision, scale))
+ return box_data_type(decimal_type)
+
+
def string():
"""
UTF8 string
@@ -374,11 +388,9 @@ def binary(int length=-1):
if length == -1:
return primitive_type(la.Type_BINARY)
- cdef FixedSizeBinaryType out = FixedSizeBinaryType()
cdef shared_ptr[CDataType] fixed_size_binary_type
fixed_size_binary_type.reset(new CFixedSizeBinaryType(length))
- out.init(fixed_size_binary_type)
- return out
+ return box_data_type(fixed_size_binary_type)
def list_(DataType value_type):
@@ -436,6 +448,8 @@ cdef DataType box_data_type(const shared_ptr[CDataType]& type):
out = TimestampType()
elif type.get().type == la.Type_FIXED_SIZE_BINARY:
out = FixedSizeBinaryType()
+ elif type.get().type == la.Type_DECIMAL:
+ out = DecimalType()
else:
out = DataType()
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/python/pyarrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index e2b03d8..d89a8e0 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -20,6 +20,7 @@ from pyarrow.compat import unittest, u # noqa
import pyarrow as pa
import datetime
+import decimal
class TestConvertList(unittest.TestCase):
@@ -162,3 +163,42 @@ class TestConvertList(unittest.TestCase):
data = ['a', 1, 2.0]
with self.assertRaises(pa.ArrowException):
pa.from_pylist(data)
+
+ def test_decimal(self):
+ data = [decimal.Decimal('1234.183'), decimal.Decimal('8094.234')]
+ type = pa.decimal(precision=7, scale=3)
+ arr = pa.from_pylist(data, type=type)
+ assert arr.to_pylist() == data
+
+ def test_decimal_different_precisions(self):
+ data = [
+ decimal.Decimal('1234234983.183'), decimal.Decimal('80943244.234')
+ ]
+ type = pa.decimal(precision=13, scale=3)
+ arr = pa.from_pylist(data, type=type)
+ assert arr.to_pylist() == data
+
+ def test_decimal_no_scale(self):
+ data = [decimal.Decimal('1234234983'), decimal.Decimal('8094324')]
+ type = pa.decimal(precision=10)
+ arr = pa.from_pylist(data, type=type)
+ assert arr.to_pylist() == data
+
+ def test_decimal_negative(self):
+ data = [decimal.Decimal('-1234.234983'), decimal.Decimal('-8.094324')]
+ type = pa.decimal(precision=10, scale=6)
+ arr = pa.from_pylist(data, type=type)
+ assert arr.to_pylist() == data
+
+ def test_decimal_no_whole_part(self):
+ data = [decimal.Decimal('-.4234983'), decimal.Decimal('.0103943')]
+ type = pa.decimal(precision=7, scale=7)
+ arr = pa.from_pylist(data, type=type)
+ assert arr.to_pylist() == data
+
+ def test_decimal_large_integer(self):
+ data = [decimal.Decimal('-394029506937548693.42983'),
+ decimal.Decimal('32358695912932.01033')]
+ type = pa.decimal(precision=23, scale=5)
+ arr = pa.from_pylist(data, type=type)
+ assert arr.to_pylist() == data
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 87c9c03..0504e1d 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -20,6 +20,7 @@ from collections import OrderedDict
import datetime
import unittest
+import decimal
import numpy as np
@@ -451,3 +452,72 @@ class TestPandasConversion(unittest.TestCase):
self._check_pandas_roundtrip(df)
self._check_array_roundtrip(col)
self._check_array_roundtrip(col, mask=strided_mask)
+
+ def test_decimal_32_from_pandas(self):
+ expected = pd.DataFrame({
+ 'decimals': [
+ decimal.Decimal('-1234.123'),
+ decimal.Decimal('1234.439'),
+ ]
+ })
+ converted = A.Table.from_pandas(expected)
+ field = A.Field.from_py('decimals', A.decimal(7, 3))
+ schema = A.Schema.from_fields([field])
+ assert converted.schema.equals(schema)
+
+ def test_decimal_32_to_pandas(self):
+ expected = pd.DataFrame({
+ 'decimals': [
+ decimal.Decimal('-1234.123'),
+ decimal.Decimal('1234.439'),
+ ]
+ })
+ converted = A.Table.from_pandas(expected)
+ df = converted.to_pandas()
+ tm.assert_frame_equal(df, expected)
+
+ def test_decimal_64_from_pandas(self):
+ expected = pd.DataFrame({
+ 'decimals': [
+ decimal.Decimal('-129934.123331'),
+ decimal.Decimal('129534.123731'),
+ ]
+ })
+ converted = A.Table.from_pandas(expected)
+ field = A.Field.from_py('decimals', A.decimal(12, 6))
+ schema = A.Schema.from_fields([field])
+ assert converted.schema.equals(schema)
+
+ def test_decimal_64_to_pandas(self):
+ expected = pd.DataFrame({
+ 'decimals': [
+ decimal.Decimal('-129934.123331'),
+ decimal.Decimal('129534.123731'),
+ ]
+ })
+ converted = A.Table.from_pandas(expected)
+ df = converted.to_pandas()
+ tm.assert_frame_equal(df, expected)
+
+ def test_decimal_128_from_pandas(self):
+ expected = pd.DataFrame({
+ 'decimals': [
+ decimal.Decimal('394092382910493.12341234678'),
+ -decimal.Decimal('314292388910493.12343437128'),
+ ]
+ })
+ converted = A.Table.from_pandas(expected)
+ field = A.Field.from_py('decimals', A.decimal(26, 11))
+ schema = A.Schema.from_fields([field])
+ assert converted.schema.equals(schema)
+
+ def test_decimal_128_to_pandas(self):
+ expected = pd.DataFrame({
+ 'decimals': [
+ decimal.Decimal('394092382910493.12341234678'),
+ -decimal.Decimal('314292388910493.12343437128'),
+ ]
+ })
+ converted = A.Table.from_pandas(expected)
+ df = converted.to_pandas()
+ tm.assert_frame_equal(df, expected)
[2/2] arrow git commit: ARROW-655: [C++/Python] Implement DecimalArray
Posted by we...@apache.org.
ARROW-655: [C++/Python] Implement DecimalArray
Adds Decimal support for C++ and Python.
TODOs:
- [x] Tighten up some of the GIL acquisition. E.g., we may not need to hold it when importing the decimal module if we acquire it where we import the decimal module.
- [x] Investigate FreeBSD issue (manifesting on OS X) where typeinfo symbols for `__int128_t` are not exported: https://bugs.llvm.org//show_bug.cgi?id=26156.
- [x] See if there's a better way to visit scalar decimals, rather than keeping extra state on the class. Seems like an unacceptable hack.
Author: Phillip Cloud <cp...@gmail.com>
Closes #403 from cpcloud/decimal and squashes the following commits:
e5470fd [Phillip Cloud] Remove unnecessary header in helpers.h
07713a7 [Phillip Cloud] Remove more boost leakage
f764156 [Phillip Cloud] Revert "Transitively link static libs as well"
a7109b2 [Phillip Cloud] Transitively link static libs as well
bf2a7ea [Phillip Cloud] Move IsNegative to cc file
cb2c1ac [Phillip Cloud] Do not link boost regex to jemalloc
e63b766 [Phillip Cloud] Remove python extra cmake args
805bbac [Phillip Cloud] ARROW-655: [C++/Python] Implement DecimalArray
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/754bcce6
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/754bcce6
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/754bcce6
Branch: refs/heads/master
Commit: 754bcce686ecf02e123dcf4801715bf155f15e1f
Parents: 449f991
Author: Phillip Cloud <cp...@gmail.com>
Authored: Sun Apr 9 15:19:53 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Sun Apr 9 15:19:53 2017 -0400
----------------------------------------------------------------------
.travis.yml | 1 +
cpp/CMakeLists.txt | 27 ++-
cpp/cmake_modules/FindPythonLibsNew.cmake | 3 +-
cpp/src/arrow/array-decimal-test.cc | 194 +++++++++++++++++++++-
cpp/src/arrow/array.cc | 49 +++++-
cpp/src/arrow/array.h | 31 +++-
cpp/src/arrow/builder.cc | 88 +++++++++-
cpp/src/arrow/builder.h | 29 +++-
cpp/src/arrow/compare.cc | 40 ++++-
cpp/src/arrow/ipc/CMakeLists.txt | 7 +-
cpp/src/arrow/python/CMakeLists.txt | 3 +-
cpp/src/arrow/python/builtin_convert.cc | 62 ++++++-
cpp/src/arrow/python/builtin_convert.h | 2 +-
cpp/src/arrow/python/common.h | 9 +-
cpp/src/arrow/python/helpers.cc | 79 +++++++++
cpp/src/arrow/python/helpers.h | 26 ++-
cpp/src/arrow/python/pandas_convert.cc | 176 +++++++++++++++++++-
cpp/src/arrow/python/python-test.cc | 33 ++++
cpp/src/arrow/type.cc | 18 +-
cpp/src/arrow/type.h | 26 ++-
cpp/src/arrow/type_fwd.h | 2 +
cpp/src/arrow/type_traits.h | 13 +-
cpp/src/arrow/util/CMakeLists.txt | 2 +
cpp/src/arrow/util/bit-util.h | 1 -
cpp/src/arrow/util/decimal-test.cc | 161 ++++++++++++++++++
cpp/src/arrow/util/decimal.cc | 141 ++++++++++++++++
cpp/src/arrow/util/decimal.h | 144 ++++++++++++++++
cpp/src/arrow/visitor_inline.h | 2 +-
format/Schema.fbs | 2 +
python/pyarrow/__init__.py | 2 +-
python/pyarrow/array.pxd | 4 +
python/pyarrow/array.pyx | 5 +
python/pyarrow/includes/common.pxd | 5 +
python/pyarrow/includes/libarrow.pxd | 16 ++
python/pyarrow/scalar.pxd | 1 +
python/pyarrow/scalar.pyx | 25 ++-
python/pyarrow/schema.pxd | 10 +-
python/pyarrow/schema.pyx | 28 +++-
python/pyarrow/tests/test_convert_builtin.py | 40 +++++
python/pyarrow/tests/test_convert_pandas.py | 70 ++++++++
40 files changed, 1497 insertions(+), 80 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index b219b03..f74a3b2 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,6 +14,7 @@ addons:
- valgrind
- libboost-dev
- libboost-filesystem-dev
+ - libboost-regex-dev
- libboost-system-dev
- libjemalloc-dev
- gtk-doc-tools
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 9947a34..5852fe5 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -398,30 +398,36 @@ if (ARROW_BOOST_USE_SHARED)
add_definitions(-DBOOST_ALL_DYN_LINK)
endif()
- find_package(Boost COMPONENTS system filesystem REQUIRED)
+ find_package(Boost COMPONENTS system filesystem regex REQUIRED)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
+ set(BOOST_SHARED_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_DEBUG})
else()
set(BOOST_SHARED_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
set(BOOST_SHARED_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
+ set(BOOST_SHARED_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_RELEASE})
endif()
set(BOOST_SYSTEM_LIBRARY boost_system_shared)
set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_shared)
+ set(BOOST_REGEX_LIBRARY boost_regex_shared)
else()
# Find static boost headers and libs
# TODO Differentiate here between release and debug builds
set(Boost_USE_STATIC_LIBS ON)
- find_package(Boost COMPONENTS system filesystem REQUIRED)
+ find_package(Boost COMPONENTS system filesystem regex REQUIRED)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_DEBUG})
set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_DEBUG})
+ set(BOOST_STATIC_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_DEBUG})
else()
set(BOOST_STATIC_SYSTEM_LIBRARY ${Boost_SYSTEM_LIBRARY_RELEASE})
set(BOOST_STATIC_FILESYSTEM_LIBRARY ${Boost_FILESYSTEM_LIBRARY_RELEASE})
+ set(BOOST_STATIC_REGEX_LIBRARY ${Boost_REGEX_LIBRARY_RELEASE})
endif()
set(BOOST_SYSTEM_LIBRARY boost_system_static)
set(BOOST_FILESYSTEM_LIBRARY boost_filesystem_static)
+ set(BOOST_REGEX_LIBRARY boost_regex_static)
endif()
message(STATUS "Boost include dir: " ${Boost_INCLUDE_DIRS})
@@ -435,7 +441,11 @@ ADD_THIRDPARTY_LIB(boost_filesystem
STATIC_LIB "${BOOST_STATIC_FILESYSTEM_LIBRARY}"
SHARED_LIB "${BOOST_SHARED_FILESYSTEM_LIBRARY}")
-SET(ARROW_BOOST_LIBS boost_system boost_filesystem)
+ADD_THIRDPARTY_LIB(boost_regex
+ STATIC_LIB "${BOOST_STATIC_REGEX_LIBRARY}"
+ SHARED_LIB "${BOOST_SHARED_REGEX_LIBRARY}")
+
+SET(ARROW_BOOST_LIBS boost_system boost_filesystem boost_regex)
include_directories(SYSTEM ${Boost_INCLUDE_DIR})
@@ -695,14 +705,16 @@ endif()
set(ARROW_MIN_TEST_LIBS
arrow_static
arrow_test_main
- ${ARROW_BASE_LIBS})
+ ${ARROW_BASE_LIBS}
+ ${BOOST_REGEX_LIBRARY})
set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
set(ARROW_BENCHMARK_LINK_LIBS
arrow_static
arrow_benchmark_main
- ${ARROW_BASE_LIBS})
+ ${ARROW_BASE_LIBS}
+ ${BOOST_REGEX_LIBRARY})
############################################################
# "make ctags" target
@@ -796,7 +808,7 @@ endif()
############################################################
set(ARROW_LINK_LIBS
-)
+ ${BOOST_REGEX_LIBRARY})
set(ARROW_PRIVATE_LINK_LIBS
)
@@ -816,6 +828,7 @@ set(ARROW_SRCS
src/arrow/visitor.cc
src/arrow/util/bit-util.cc
+ src/arrow/util/decimal.cc
)
if(NOT APPLE AND NOT MSVC)
@@ -825,9 +838,11 @@ if(NOT APPLE AND NOT MSVC)
set(ARROW_SHARED_LINK_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/src/arrow/symbols.map")
endif()
+
ADD_ARROW_LIB(arrow
SOURCES ${ARROW_SRCS}
SHARED_LINK_FLAGS ${ARROW_SHARED_LINK_FLAGS}
+ SHARED_LINK_LIBS ${ARROW_LINK_LIBS}
)
add_subdirectory(src/arrow)
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/cmake_modules/FindPythonLibsNew.cmake
----------------------------------------------------------------------
diff --git a/cpp/cmake_modules/FindPythonLibsNew.cmake b/cpp/cmake_modules/FindPythonLibsNew.cmake
index dfe5661..d9cc4b3 100644
--- a/cpp/cmake_modules/FindPythonLibsNew.cmake
+++ b/cpp/cmake_modules/FindPythonLibsNew.cmake
@@ -175,7 +175,8 @@ else()
find_library(PYTHON_LIBRARY
NAMES "python${PYTHON_LIBRARY_SUFFIX}"
PATHS ${_PYTHON_LIBS_SEARCH}
- NO_SYSTEM_ENVIRONMENT_PATH)
+ NO_SYSTEM_ENVIRONMENT_PATH
+ NO_CMAKE_SYSTEM_PATH)
message(STATUS "Found Python lib ${PYTHON_LIBRARY}")
endif()
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/array-decimal-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array-decimal-test.cc b/cpp/src/arrow/array-decimal-test.cc
index b64023b..4c01f92 100644
--- a/cpp/src/arrow/array-decimal-test.cc
+++ b/cpp/src/arrow/array-decimal-test.cc
@@ -15,13 +15,16 @@
// specific language governing permissions and limitations
// under the License.
+#include "arrow/type.h"
#include "gtest/gtest.h"
-#include "arrow/type.h"
+#include "arrow/builder.h"
+#include "arrow/test-util.h"
+#include "arrow/util/decimal.h"
namespace arrow {
-TEST(TypesTest, TestDecimalType) {
+TEST(TypesTest, TestDecimal32Type) {
DecimalType t1(8, 4);
ASSERT_EQ(t1.type, Type::DECIMAL);
@@ -29,6 +32,193 @@ TEST(TypesTest, TestDecimalType) {
ASSERT_EQ(t1.scale, 4);
ASSERT_EQ(t1.ToString(), std::string("decimal(8, 4)"));
+
+ // Test properties
+ ASSERT_EQ(t1.byte_width(), 4);
+ ASSERT_EQ(t1.bit_width(), 32);
}
+TEST(TypesTest, TestDecimal64Type) {
+ DecimalType t1(12, 5);
+
+ ASSERT_EQ(t1.type, Type::DECIMAL);
+ ASSERT_EQ(t1.precision, 12);
+ ASSERT_EQ(t1.scale, 5);
+
+ ASSERT_EQ(t1.ToString(), std::string("decimal(12, 5)"));
+
+ // Test properties
+ ASSERT_EQ(t1.byte_width(), 8);
+ ASSERT_EQ(t1.bit_width(), 64);
+}
+
+TEST(TypesTest, TestDecimal128Type) {
+ DecimalType t1(27, 7);
+
+ ASSERT_EQ(t1.type, Type::DECIMAL);
+ ASSERT_EQ(t1.precision, 27);
+ ASSERT_EQ(t1.scale, 7);
+
+ ASSERT_EQ(t1.ToString(), std::string("decimal(27, 7)"));
+
+ // Test properties
+ ASSERT_EQ(t1.byte_width(), 16);
+ ASSERT_EQ(t1.bit_width(), 128);
+}
+
+template <typename T>
+class DecimalTestBase {
+ public:
+ virtual std::vector<uint8_t> data(
+ const std::vector<T>& input, size_t byte_width) const = 0;
+
+ void test(int precision, const std::vector<T>& draw,
+ const std::vector<uint8_t>& valid_bytes,
+ const std::vector<uint8_t>& sign_bitmap = {}, int64_t offset = 0) const {
+ auto type = std::make_shared<DecimalType>(precision, 4);
+ int byte_width = type->byte_width();
+ auto pool = default_memory_pool();
+ auto builder = std::make_shared<DecimalBuilder>(pool, type);
+ size_t null_count = 0;
+
+ size_t size = draw.size();
+ builder->Reserve(size);
+
+ for (size_t i = 0; i < size; ++i) {
+ if (valid_bytes[i]) {
+ builder->Append(draw[i]);
+ } else {
+ builder->AppendNull();
+ ++null_count;
+ }
+ }
+
+ std::shared_ptr<Buffer> expected_sign_bitmap;
+ if (!sign_bitmap.empty()) {
+ BitUtil::BytesToBits(sign_bitmap, &expected_sign_bitmap);
+ }
+
+ auto raw_bytes = data(draw, byte_width);
+ auto expected_data = std::make_shared<Buffer>(raw_bytes.data(), size * byte_width);
+ auto expected_null_bitmap = test::bytes_to_null_buffer(valid_bytes);
+ int64_t expected_null_count = test::null_count(valid_bytes);
+ auto expected = std::make_shared<DecimalArray>(type, size, expected_data,
+ expected_null_bitmap, expected_null_count, offset, expected_sign_bitmap);
+
+ std::shared_ptr<Array> out;
+ ASSERT_OK(builder->Finish(&out));
+ ASSERT_TRUE(out->Equals(*expected));
+ }
+};
+
+template <typename T>
+class DecimalTest : public DecimalTestBase<T> {
+ public:
+ std::vector<uint8_t> data(
+ const std::vector<T>& input, size_t byte_width) const override {
+ std::vector<uint8_t> result;
+ result.reserve(input.size() * byte_width);
+ // TODO(phillipc): There's probably a better way to do this
+ constexpr static const size_t bytes_per_element = sizeof(T);
+ for (size_t i = 0, j = 0; i < input.size(); ++i, j += bytes_per_element) {
+ *reinterpret_cast<typename T::value_type*>(&result[j]) = input[i].value;
+ }
+ return result;
+ }
+};
+
+template <>
+class DecimalTest<Decimal128> : public DecimalTestBase<Decimal128> {
+ public:
+ std::vector<uint8_t> data(
+ const std::vector<Decimal128>& input, size_t byte_width) const override {
+ std::vector<uint8_t> result;
+ result.reserve(input.size() * byte_width);
+ constexpr static const size_t bytes_per_element = 16;
+ for (size_t i = 0; i < input.size(); ++i) {
+ uint8_t stack_bytes[bytes_per_element] = {0};
+ uint8_t* bytes = stack_bytes;
+ bool is_negative;
+ ToBytes(input[i], &bytes, &is_negative);
+
+ for (size_t i = 0; i < bytes_per_element; ++i) {
+ result.push_back(bytes[i]);
+ }
+ }
+ return result;
+ }
+};
+
+class Decimal32BuilderTest : public ::testing::TestWithParam<int>,
+ public DecimalTest<Decimal32> {};
+
+class Decimal64BuilderTest : public ::testing::TestWithParam<int>,
+ public DecimalTest<Decimal64> {};
+
+class Decimal128BuilderTest : public ::testing::TestWithParam<int>,
+ public DecimalTest<Decimal128> {};
+
+TEST_P(Decimal32BuilderTest, NoNulls) {
+ int precision = GetParam();
+ std::vector<Decimal32> draw = {
+ Decimal32(1), Decimal32(2), Decimal32(2389), Decimal32(4), Decimal32(-12348)};
+ std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
+ this->test(precision, draw, valid_bytes);
+}
+
+TEST_P(Decimal64BuilderTest, NoNulls) {
+ int precision = GetParam();
+ std::vector<Decimal64> draw = {
+ Decimal64(1), Decimal64(2), Decimal64(2389), Decimal64(4), Decimal64(-12348)};
+ std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
+ this->test(precision, draw, valid_bytes);
+}
+
+TEST_P(Decimal128BuilderTest, NoNulls) {
+ int precision = GetParam();
+ std::vector<Decimal128> draw = {
+ Decimal128(1), Decimal128(-2), Decimal128(2389), Decimal128(4), Decimal128(-12348)};
+ std::vector<uint8_t> valid_bytes = {true, true, true, true, true};
+ std::vector<uint8_t> sign_bitmap = {false, true, false, false, true};
+ this->test(precision, draw, valid_bytes, sign_bitmap);
+}
+
+TEST_P(Decimal32BuilderTest, WithNulls) {
+ int precision = GetParam();
+ std::vector<Decimal32> draw = {
+ Decimal32(1), Decimal32(2), Decimal32(-1), Decimal32(4), Decimal32(-1)};
+ std::vector<uint8_t> valid_bytes = {true, true, false, true, false};
+ this->test(precision, draw, valid_bytes);
+}
+
+TEST_P(Decimal64BuilderTest, WithNulls) {
+ int precision = GetParam();
+ std::vector<Decimal64> draw = {
+ Decimal64(-1), Decimal64(2), Decimal64(-1), Decimal64(4), Decimal64(-1)};
+ std::vector<uint8_t> valid_bytes = {true, true, false, true, false};
+ this->test(precision, draw, valid_bytes);
+}
+
+TEST_P(Decimal128BuilderTest, WithNulls) {
+ int precision = GetParam();
+ std::vector<Decimal128> draw = {Decimal128(1), Decimal128(2), Decimal128(-1),
+ Decimal128(4), Decimal128(-1), Decimal128(1), Decimal128(2),
+ Decimal128("230342903942.234234"), Decimal128("-23049302932.235234")};
+ std::vector<uint8_t> valid_bytes = {
+ true, true, false, true, false, true, true, true, true};
+ std::vector<uint8_t> sign_bitmap = {
+ false, false, false, false, false, false, false, false, true};
+ this->test(precision, draw, valid_bytes, sign_bitmap);
+}
+
+INSTANTIATE_TEST_CASE_P(Decimal32BuilderTest, Decimal32BuilderTest,
+ ::testing::Range(
+ DecimalPrecision<int32_t>::minimum, DecimalPrecision<int32_t>::maximum));
+INSTANTIATE_TEST_CASE_P(Decimal64BuilderTest, Decimal64BuilderTest,
+ ::testing::Range(
+ DecimalPrecision<int64_t>::minimum, DecimalPrecision<int64_t>::maximum));
+INSTANTIATE_TEST_CASE_P(Decimal128BuilderTest, Decimal128BuilderTest,
+ ::testing::Range(
+ DecimalPrecision<int128_t>::minimum, DecimalPrecision<int128_t>::maximum));
+
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index bd20654..4e73e71 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -27,6 +27,7 @@
#include "arrow/status.h"
#include "arrow/type_traits.h"
#include "arrow/util/bit-util.h"
+#include "arrow/util/decimal.h"
#include "arrow/util/logging.h"
#include "arrow/visitor.h"
#include "arrow/visitor_inline.h"
@@ -283,10 +284,8 @@ std::shared_ptr<Array> StringArray::Slice(int64_t offset, int64_t length) const
FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<DataType>& type,
int64_t length, const std::shared_ptr<Buffer>& data,
const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count, int64_t offset)
- : PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {
- DCHECK(type->type == Type::FIXED_SIZE_BINARY);
- byte_width_ = static_cast<const FixedSizeBinaryType&>(*type).byte_width();
-}
+ : PrimitiveArray(type, length, data, null_bitmap, null_count, offset),
+ byte_width_(static_cast<const FixedSizeBinaryType&>(*type).byte_width()) {}
std::shared_ptr<Array> FixedSizeBinaryArray::Slice(int64_t offset, int64_t length) const {
ConformSliceParams(offset_, length_, &offset, &length);
@@ -294,6 +293,48 @@ std::shared_ptr<Array> FixedSizeBinaryArray::Slice(int64_t offset, int64_t lengt
type_, length, data_, null_bitmap_, kUnknownNullCount, offset);
}
+const uint8_t* FixedSizeBinaryArray::GetValue(int64_t i) const {
+ return raw_data_ + (i + offset_) * byte_width_;
+}
+
+// ----------------------------------------------------------------------
+// Decimal
+DecimalArray::DecimalArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Buffer>& data, const std::shared_ptr<Buffer>& null_bitmap,
+ int64_t null_count, int64_t offset, const std::shared_ptr<Buffer>& sign_bitmap)
+ : FixedSizeBinaryArray(type, length, data, null_bitmap, null_count, offset),
+ sign_bitmap_(sign_bitmap),
+ sign_bitmap_data_(sign_bitmap != nullptr ? sign_bitmap->data() : nullptr) {}
+
+bool DecimalArray::IsNegative(int64_t i) const {
+ return sign_bitmap_data_ != nullptr ? BitUtil::GetBit(sign_bitmap_data_, i) : false;
+}
+
+template <typename T>
+ARROW_EXPORT Decimal<T> DecimalArray::Value(int64_t i) const {
+ Decimal<T> result;
+ FromBytes(GetValue(i), &result);
+ return result;
+}
+
+template ARROW_EXPORT Decimal32 DecimalArray::Value(int64_t i) const;
+template ARROW_EXPORT Decimal64 DecimalArray::Value(int64_t i) const;
+
+template <>
+ARROW_EXPORT Decimal128 DecimalArray::Value(int64_t i) const {
+ Decimal128 result;
+ FromBytes(GetValue(i), IsNegative(i), &result);
+ return result;
+}
+
+template ARROW_EXPORT Decimal128 DecimalArray::Value(int64_t i) const;
+
+std::shared_ptr<Array> DecimalArray::Slice(int64_t offset, int64_t length) const {
+ ConformSliceParams(offset_, length_, &offset, &length);
+ return std::make_shared<DecimalArray>(
+ type_, length, data_, null_bitmap_, kUnknownNullCount, offset, sign_bitmap_);
+}
+
// ----------------------------------------------------------------------
// Struct
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/array.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 9f0e739..a4117fa 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -39,6 +39,9 @@ class MemoryPool;
class MutableBuffer;
class Status;
+template <typename T>
+struct Decimal;
+
/// Immutable data array with some logical type and some length.
///
/// Any memory is owned by the respective Buffer instance (or its parents).
@@ -356,9 +359,7 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
const std::shared_ptr<Buffer>& null_bitmap = nullptr, int64_t null_count = 0,
int64_t offset = 0);
- const uint8_t* GetValue(int64_t i) const {
- return raw_data_ + (i + offset_) * byte_width_;
- }
+ const uint8_t* GetValue(int64_t i) const;
int32_t byte_width() const { return byte_width_; }
@@ -371,6 +372,30 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
};
// ----------------------------------------------------------------------
+// DecimalArray
+class ARROW_EXPORT DecimalArray : public FixedSizeBinaryArray {
+ public:
+ using TypeClass = Type;
+
+ DecimalArray(const std::shared_ptr<DataType>& type, int64_t length,
+ const std::shared_ptr<Buffer>& data,
+ const std::shared_ptr<Buffer>& null_bitmap = nullptr, int64_t null_count = 0,
+ int64_t offset = 0, const std::shared_ptr<Buffer>& sign_bitmap = nullptr);
+
+ bool IsNegative(int64_t i) const;
+
+ template <typename T>
+ ARROW_EXPORT Decimal<T> Value(int64_t i) const;
+
+ std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const override;
+
+ private:
+ /// Only needed for 128 bit Decimals
+ std::shared_ptr<Buffer> sign_bitmap_;
+ const uint8_t* sign_bitmap_data_;
+};
+
+// ----------------------------------------------------------------------
// Struct
class ARROW_EXPORT StructArray : public Array {
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 40b81cf..a3677ef 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -27,6 +27,7 @@
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/bit-util.h"
+#include "arrow/util/decimal.h"
#include "arrow/util/logging.h"
namespace arrow {
@@ -324,6 +325,85 @@ Status BooleanBuilder::Append(
}
// ----------------------------------------------------------------------
+// DecimalBuilder
+DecimalBuilder::DecimalBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type)
+ : FixedSizeBinaryBuilder(pool, type),
+ sign_bitmap_(nullptr),
+ sign_bitmap_data_(nullptr) {}
+
+template <typename T>
+ARROW_EXPORT Status DecimalBuilder::Append(const Decimal<T>& val) {
+ DCHECK_EQ(sign_bitmap_, nullptr) << "sign_bitmap_ is not null";
+ DCHECK_EQ(sign_bitmap_data_, nullptr) << "sign_bitmap_data_ is not null";
+
+ RETURN_NOT_OK(FixedSizeBinaryBuilder::Reserve(1));
+ return FixedSizeBinaryBuilder::Append(reinterpret_cast<const uint8_t*>(&val.value));
+}
+
+template ARROW_EXPORT Status DecimalBuilder::Append(const Decimal32& val);
+template ARROW_EXPORT Status DecimalBuilder::Append(const Decimal64& val);
+
+template <>
+ARROW_EXPORT Status DecimalBuilder::Append(const Decimal128& value) {
+ DCHECK_NE(sign_bitmap_, nullptr) << "sign_bitmap_ is null";
+ DCHECK_NE(sign_bitmap_data_, nullptr) << "sign_bitmap_data_ is null";
+
+ RETURN_NOT_OK(FixedSizeBinaryBuilder::Reserve(1));
+ uint8_t stack_bytes[16] = {0};
+ uint8_t* bytes = stack_bytes;
+ bool is_negative;
+ ToBytes(value, &bytes, &is_negative);
+ RETURN_NOT_OK(FixedSizeBinaryBuilder::Append(bytes));
+
+ // TODO(phillipc): calculate the proper storage size here (do we have a function to do
+ // this)?
+ // TODO(phillipc): Reserve number of elements
+ RETURN_NOT_OK(sign_bitmap_->Reserve(1));
+ BitUtil::SetBitTo(sign_bitmap_data_, length_ - 1, is_negative);
+ return Status::OK();
+}
+
+template ARROW_EXPORT Status DecimalBuilder::Append(const Decimal128& val);
+
+Status DecimalBuilder::Init(int64_t capacity) {
+ RETURN_NOT_OK(FixedSizeBinaryBuilder::Init(capacity));
+ if (byte_width_ == 16) {
+ AllocateResizableBuffer(pool_, null_bitmap_->size(), &sign_bitmap_);
+ sign_bitmap_data_ = sign_bitmap_->mutable_data();
+ memset(sign_bitmap_data_, 0, static_cast<size_t>(sign_bitmap_->capacity()));
+ }
+ return Status::OK();
+}
+
+Status DecimalBuilder::Resize(int64_t capacity) {
+ int64_t old_bytes = null_bitmap_ != nullptr ? null_bitmap_->size() : 0;
+ if (sign_bitmap_ == nullptr) { return Init(capacity); }
+ RETURN_NOT_OK(FixedSizeBinaryBuilder::Resize(capacity));
+
+ if (byte_width_ == 16) {
+ RETURN_NOT_OK(sign_bitmap_->Resize(null_bitmap_->size()));
+ int64_t new_bytes = sign_bitmap_->size();
+ sign_bitmap_data_ = sign_bitmap_->mutable_data();
+
+ // The buffer might be overpadded to deal with padding according to the spec
+ if (old_bytes < new_bytes) {
+ memset(sign_bitmap_data_ + old_bytes, 0,
+ static_cast<size_t>(sign_bitmap_->capacity() - old_bytes));
+ }
+ }
+ return Status::OK();
+}
+
+Status DecimalBuilder::Finish(std::shared_ptr<Array>* out) {
+ std::shared_ptr<Buffer> data = byte_builder_.Finish();
+
+ /// TODO(phillipc): not sure where to get the offset argument here
+ *out = std::make_shared<DecimalArray>(
+ type_, length_, data, null_bitmap_, null_count_, 0, sign_bitmap_);
+ return Status::OK();
+}
+
+// ----------------------------------------------------------------------
// ListBuilder
ListBuilder::ListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> value_builder,
@@ -440,10 +520,9 @@ Status StringBuilder::Finish(std::shared_ptr<Array>* out) {
FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(
MemoryPool* pool, const std::shared_ptr<DataType>& type)
- : ArrayBuilder(pool, type), byte_builder_(pool) {
- DCHECK(type->type == Type::FIXED_SIZE_BINARY);
- byte_width_ = static_cast<const FixedSizeBinaryType&>(*type).byte_width();
-}
+ : ArrayBuilder(pool, type),
+ byte_width_(static_cast<const FixedSizeBinaryType&>(*type).byte_width()),
+ byte_builder_(pool) {}
Status FixedSizeBinaryBuilder::Append(const uint8_t* value) {
RETURN_NOT_OK(Reserve(1));
@@ -543,6 +622,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
BUILDER_CASE(STRING, StringBuilder);
BUILDER_CASE(BINARY, BinaryBuilder);
BUILDER_CASE(FIXED_SIZE_BINARY, FixedSizeBinaryBuilder);
+ BUILDER_CASE(DECIMAL, DecimalBuilder);
case Type::LIST: {
std::shared_ptr<ArrayBuilder> value_builder;
std::shared_ptr<DataType> value_type =
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/builder.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 60cdc4c..d42ab5b 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -37,6 +37,9 @@ namespace arrow {
class Array;
+template <typename T>
+struct Decimal;
+
static constexpr int64_t kMinBuilderCapacity = 1 << 5;
/// Base class for all data array builders.
@@ -76,12 +79,12 @@ class ARROW_EXPORT ArrayBuilder {
Status SetNotNull(int64_t length);
/// Allocates initial capacity requirements for the builder. In most
- /// cases subclasses should override and call there parent classes
+ /// cases subclasses should override and call their parent class's
/// method as well.
virtual Status Init(int64_t capacity);
/// Resizes the null_bitmap array. In most
- /// cases subclasses should override and call there parent classes
+ /// cases subclasses should override and call their parent class's
/// method as well.
virtual Status Resize(int64_t new_bits);
@@ -275,9 +278,7 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
return Status::OK();
}
- Status Append(uint8_t val) {
- return Append(val != 0);
- }
+ Status Append(uint8_t val) { return Append(val != 0); }
/// Vector append
///
@@ -415,6 +416,24 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
BufferBuilder byte_builder_;
};
+class ARROW_EXPORT DecimalBuilder : public FixedSizeBinaryBuilder {
+ public:
+ explicit DecimalBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type);
+
+ template <typename T>
+ ARROW_EXPORT Status Append(const Decimal<T>& val);
+
+ Status Init(int64_t capacity) override;
+ Status Resize(int64_t capacity) override;
+ Status Finish(std::shared_ptr<Array>* out) override;
+
+ private:
+ /// We only need these for 128 bit decimals, because boost stores the sign
+ /// separate from the underlying bytes.
+ std::shared_ptr<ResizableBuffer> sign_bitmap_;
+ uint8_t* sign_bitmap_data_;
+};
+
// ----------------------------------------------------------------------
// Struct
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/compare.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 7451439..2297e4b 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -29,6 +29,7 @@
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/bit-util.h"
+#include "arrow/util/decimal.h"
#include "arrow/util/logging.h"
#include "arrow/visitor_inline.h"
@@ -232,6 +233,41 @@ class RangeEqualsVisitor {
return Status::OK();
}
+ Status Visit(const DecimalArray& left) {
+ const auto& right = static_cast<const DecimalArray&>(right_);
+
+ int32_t width = left.byte_width();
+
+ const uint8_t* left_data = nullptr;
+ const uint8_t* right_data = nullptr;
+
+ if (left.data()) { left_data = left.raw_data() + left.offset() * width; }
+
+ if (right.data()) { right_data = right.raw_data() + right.offset() * width; }
+
+ for (int64_t i = left_start_idx_, o_i = right_start_idx_; i < left_end_idx_;
+ ++i, ++o_i) {
+ if (left.IsNegative(i) != right.IsNegative(o_i)) {
+ result_ = false;
+ return Status::OK();
+ }
+
+ const bool is_null = left.IsNull(i);
+ if (is_null != right.IsNull(o_i)) {
+ result_ = false;
+ return Status::OK();
+ }
+ if (is_null) continue;
+
+ if (std::memcmp(left_data + width * i, right_data + width * o_i, width)) {
+ result_ = false;
+ return Status::OK();
+ }
+ }
+ result_ = true;
+ return Status::OK();
+ }
+
Status Visit(const NullArray& left) {
UNUSED(left);
result_ = true;
@@ -244,10 +280,6 @@ class RangeEqualsVisitor {
return CompareValues<T>(left);
}
- Status Visit(const DecimalArray& left) {
- return Status::NotImplemented("Decimal type");
- }
-
Status Visit(const ListArray& left) {
result_ = CompareLists(left);
return Status::OK();
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/ipc/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index 57db033..c6880c5 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -27,7 +27,8 @@ set(ARROW_IPC_SHARED_LINK_LIBS
set(ARROW_IPC_TEST_LINK_LIBS
arrow_ipc_static
arrow_io_static
- arrow_static)
+ arrow_static
+ ${BOOST_REGEX_LIBRARY})
set(ARROW_IPC_SRCS
feather.cc
@@ -161,7 +162,8 @@ if(MSVC)
arrow_io_static
arrow_static
${BOOST_FILESYSTEM_LIBRARY}
- ${BOOST_SYSTEM_LIBRARY})
+ ${BOOST_SYSTEM_LIBRARY}
+ ${BOOST_REGEX_LIBRARY})
else()
set(UTIL_LINK_LIBS
arrow_ipc_static
@@ -169,6 +171,7 @@ else()
arrow_static
${BOOST_FILESYSTEM_LIBRARY}
${BOOST_SYSTEM_LIBRARY}
+ ${BOOST_REGEX_LIBRARY}
dl)
endif()
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt
index c69d976..604527f 100644
--- a/cpp/src/arrow/python/CMakeLists.txt
+++ b/cpp/src/arrow/python/CMakeLists.txt
@@ -37,7 +37,8 @@ set(ARROW_PYTHON_MIN_TEST_LIBS
arrow_python_static
arrow_ipc_static
arrow_io_static
- arrow_static)
+ arrow_static
+ ${BOOST_REGEX_LIBRARY})
if(ARROW_BUILD_TESTS)
ADD_THIRDPARTY_LIB(python
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/python/builtin_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index 25b32ee..189ecee 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -17,12 +17,16 @@
#include <Python.h>
#include <datetime.h>
+
+#include <algorithm>
#include <sstream>
+#include <string>
#include "arrow/python/builtin_convert.h"
#include "arrow/api.h"
#include "arrow/status.h"
+#include "arrow/util/decimal.h"
#include "arrow/util/logging.h"
#include "arrow/python/helpers.h"
@@ -109,7 +113,6 @@ class ScalarVisitor {
int64_t float_count_;
int64_t binary_count_;
int64_t unicode_count_;
-
// Place to accumulate errors
// std::vector<Status> errors_;
};
@@ -394,8 +397,7 @@ class BytesConverter : public TypedConverter<BinaryBuilder> {
} else if (PyBytes_Check(item)) {
bytes_obj = item;
} else {
- return Status::Invalid(
- "Value that cannot be converted to bytes was encountered");
+ return Status::Invalid("Value that cannot be converted to bytes was encountered");
}
// No error checking
length = PyBytes_GET_SIZE(bytes_obj);
@@ -429,8 +431,7 @@ class FixedWidthBytesConverter : public TypedConverter<FixedSizeBinaryBuilder> {
} else if (PyBytes_Check(item)) {
bytes_obj = item;
} else {
- return Status::Invalid(
- "Value that cannot be converted to bytes was encountered");
+ return Status::Invalid("Value that cannot be converted to bytes was encountered");
}
// No error checking
RETURN_NOT_OK(CheckPythonBytesAreFixedLength(bytes_obj, expected_length));
@@ -495,6 +496,54 @@ class ListConverter : public TypedConverter<ListBuilder> {
std::shared_ptr<SeqConverter> value_converter_;
};
+#define DECIMAL_CONVERT_CASE(bit_width, item, builder) \
+ case bit_width: { \
+ arrow::Decimal##bit_width out; \
+ RETURN_NOT_OK(PythonDecimalToArrowDecimal((item), &out)); \
+ RETURN_NOT_OK((builder)->Append(out)); \
+ break; \
+ }
+
+class DecimalConverter : public TypedConverter<arrow::DecimalBuilder> {
+ public:
+ Status AppendData(PyObject* seq) override {
+ /// Ensure we've allocated enough space
+ Py_ssize_t size = PySequence_Size(seq);
+ RETURN_NOT_OK(typed_builder_->Reserve(size));
+
+ /// Can the compiler figure out that the case statement below isn't necessary
+ /// once we're running?
+ const int bit_width =
+ std::dynamic_pointer_cast<arrow::DecimalType>(typed_builder_->type())
+ ->bit_width();
+
+ OwnedRef ref;
+ PyObject* item = nullptr;
+ for (int64_t i = 0; i < size; ++i) {
+ ref.reset(PySequence_GetItem(seq, i));
+ item = ref.obj();
+
+ /// TODO(phillipc): Check for nan?
+ if (item != Py_None) {
+ switch (bit_width) {
+ DECIMAL_CONVERT_CASE(32, item, typed_builder_)
+ DECIMAL_CONVERT_CASE(64, item, typed_builder_)
+ DECIMAL_CONVERT_CASE(128, item, typed_builder_)
+ default:
+ break;
+ }
+ RETURN_IF_PYERROR();
+ } else {
+ RETURN_NOT_OK(typed_builder_->AppendNull());
+ }
+ }
+
+ return Status::OK();
+ }
+};
+
+#undef DECIMAL_CONVERT_CASE
+
// Dynamic constructor for sequence converters
std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type) {
switch (type->type) {
@@ -516,6 +565,9 @@ std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type
return std::make_shared<UTF8Converter>();
case Type::LIST:
return std::make_shared<ListConverter>();
+ case Type::DECIMAL: {
+ return std::make_shared<DecimalConverter>();
+ }
case Type::STRUCT:
default:
return nullptr;
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/python/builtin_convert.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/builtin_convert.h b/cpp/src/arrow/python/builtin_convert.h
index 00ff0fd..3c2e350 100644
--- a/cpp/src/arrow/python/builtin_convert.h
+++ b/cpp/src/arrow/python/builtin_convert.h
@@ -25,7 +25,7 @@
#include <memory>
-#include <arrow/type.h>
+#include "arrow/type.h"
#include "arrow/util/visibility.h"
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/python/common.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/common.h b/cpp/src/arrow/python/common.h
index 32bfa78..a6806ab 100644
--- a/cpp/src/arrow/python/common.h
+++ b/cpp/src/arrow/python/common.h
@@ -57,12 +57,13 @@ class OwnedRef {
}
void reset(PyObject* obj) {
- if (obj_ != nullptr) { Py_XDECREF(obj_); }
+ /// TODO(phillipc): Should we acquire the GIL here? It definitely needs to be
+ /// acquired,
+ /// but callers have probably already acquired it
+ Py_XDECREF(obj_);
obj_ = obj;
}
- void release() { obj_ = nullptr; }
-
PyObject* obj() const { return obj_; }
private:
@@ -72,6 +73,7 @@ class OwnedRef {
struct PyObjectStringify {
OwnedRef tmp_obj;
const char* bytes;
+ Py_ssize_t size;
explicit PyObjectStringify(PyObject* obj) {
PyObject* bytes_obj;
@@ -82,6 +84,7 @@ struct PyObjectStringify {
bytes_obj = obj;
}
bytes = PyBytes_AsString(bytes_obj);
+ size = PyBytes_GET_SIZE(bytes_obj);
}
};
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/python/helpers.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/helpers.cc b/cpp/src/arrow/python/helpers.cc
index be5f412..ffba7bb 100644
--- a/cpp/src/arrow/python/helpers.cc
+++ b/cpp/src/arrow/python/helpers.cc
@@ -16,6 +16,8 @@
// under the License.
#include "arrow/python/helpers.h"
+#include "arrow/python/common.h"
+#include "arrow/util/decimal.h"
#include <arrow/api.h>
@@ -52,5 +54,82 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
}
}
+Status ImportModule(const std::string& module_name, OwnedRef* ref) {
+ PyAcquireGIL lock;
+ PyObject* module = PyImport_ImportModule(module_name.c_str());
+ RETURN_IF_PYERROR();
+ ref->reset(module);
+ return Status::OK();
+}
+
+Status ImportFromModule(const OwnedRef& module, const std::string& name, OwnedRef* ref) {
+ /// Assumes that ImportModule was called first
+ DCHECK_NE(module.obj(), nullptr) << "Cannot import from nullptr Python module";
+
+ PyAcquireGIL lock;
+ PyObject* attr = PyObject_GetAttrString(module.obj(), name.c_str());
+ RETURN_IF_PYERROR();
+ ref->reset(attr);
+ return Status::OK();
+}
+
+template <typename T>
+Status PythonDecimalToArrowDecimal(PyObject* python_decimal, Decimal<T>* arrow_decimal) {
+ // Call Python's str(decimal_object)
+ OwnedRef str_obj(PyObject_Str(python_decimal));
+ RETURN_IF_PYERROR();
+
+ PyObjectStringify str(str_obj.obj());
+ RETURN_IF_PYERROR();
+
+ const char* bytes = str.bytes;
+ DCHECK_NE(bytes, nullptr);
+
+ Py_ssize_t size = str.size;
+
+ std::string c_string(bytes, size);
+ return FromString(c_string, arrow_decimal);
+}
+
+template Status PythonDecimalToArrowDecimal(
+ PyObject* python_decimal, Decimal32* arrow_decimal);
+template Status PythonDecimalToArrowDecimal(
+ PyObject* python_decimal, Decimal64* arrow_decimal);
+template Status PythonDecimalToArrowDecimal(
+ PyObject* python_decimal, Decimal128* arrow_decimal);
+
+Status InferDecimalPrecisionAndScale(
+ PyObject* python_decimal, int* precision, int* scale) {
+ // Call Python's str(decimal_object)
+ OwnedRef str_obj(PyObject_Str(python_decimal));
+ RETURN_IF_PYERROR();
+ PyObjectStringify str(str_obj.obj());
+
+ const char* bytes = str.bytes;
+ DCHECK_NE(bytes, nullptr);
+
+ auto size = str.size;
+
+ std::string c_string(bytes, size);
+ return FromString(c_string, static_cast<Decimal32*>(nullptr), precision, scale);
+}
+
+Status DecimalFromString(
+ PyObject* decimal_constructor, const std::string& decimal_string, PyObject** out) {
+ DCHECK_NE(decimal_constructor, nullptr);
+ DCHECK_NE(out, nullptr);
+
+ auto string_size = decimal_string.size();
+ DCHECK_GT(string_size, 0);
+
+ auto string_bytes = decimal_string.c_str();
+ DCHECK_NE(string_bytes, nullptr);
+
+ *out = PyObject_CallFunction(
+ decimal_constructor, const_cast<char*>("s#"), string_bytes, string_size);
+ RETURN_IF_PYERROR();
+ return Status::OK();
+}
+
} // namespace py
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/python/helpers.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/helpers.h b/cpp/src/arrow/python/helpers.h
index 611e814..a19b25f 100644
--- a/cpp/src/arrow/python/helpers.h
+++ b/cpp/src/arrow/python/helpers.h
@@ -18,16 +18,38 @@
#ifndef PYARROW_HELPERS_H
#define PYARROW_HELPERS_H
+#include <Python.h>
+
#include <memory>
+#include <string>
+#include <utility>
#include "arrow/type.h"
#include "arrow/util/visibility.h"
namespace arrow {
+
+template <typename T>
+struct Decimal;
+
namespace py {
-ARROW_EXPORT
-std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
+class OwnedRef;
+
+ARROW_EXPORT std::shared_ptr<DataType> GetPrimitiveType(Type::type type);
+
+Status ImportModule(const std::string& module_name, OwnedRef* ref);
+Status ImportFromModule(
+ const OwnedRef& module, const std::string& module_name, OwnedRef* ref);
+
+template <typename T>
+Status PythonDecimalToArrowDecimal(PyObject* python_decimal, Decimal<T>* arrow_decimal);
+
+Status InferDecimalPrecisionAndScale(
+ PyObject* python_decimal, int* precision = nullptr, int* scale = nullptr);
+
+Status DecimalFromString(
+ PyObject* decimal_constructor, const std::string& decimal_string, PyObject** out);
} // namespace py
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/python/pandas_convert.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/pandas_convert.cc b/cpp/src/arrow/python/pandas_convert.cc
index 48d3489..f6e627e 100644
--- a/cpp/src/arrow/python/pandas_convert.cc
+++ b/cpp/src/arrow/python/pandas_convert.cc
@@ -41,12 +41,14 @@
#include "arrow/type_fwd.h"
#include "arrow/type_traits.h"
#include "arrow/util/bit-util.h"
+#include "arrow/util/decimal.h"
#include "arrow/util/logging.h"
#include "arrow/util/macros.h"
#include "arrow/python/builtin_convert.h"
#include "arrow/python/common.h"
#include "arrow/python/config.h"
+#include "arrow/python/helpers.h"
#include "arrow/python/numpy-internal.h"
#include "arrow/python/numpy_convert.h"
#include "arrow/python/type_traits.h"
@@ -375,6 +377,7 @@ class PandasConverter : public TypeVisitor {
Status ConvertDates();
Status ConvertLists(const std::shared_ptr<DataType>& type);
Status ConvertObjects();
+ Status ConvertDecimals();
protected:
MemoryPool* pool_;
@@ -468,15 +471,14 @@ Status InvalidConversion(PyObject* obj, const std::string& expected_type_name) {
RETURN_IF_PYERROR();
DCHECK_NE(type_name.obj(), nullptr);
- OwnedRef bytes_obj(PyUnicode_AsUTF8String(type_name.obj()));
+ PyObjectStringify bytestring(type_name.obj());
RETURN_IF_PYERROR();
- DCHECK_NE(bytes_obj.obj(), nullptr);
-
- Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj.obj());
- const char* bytes = PyBytes_AS_STRING(bytes_obj.obj());
+ const char* bytes = bytestring.bytes;
DCHECK_NE(bytes, nullptr) << "bytes from type(...).__name__ were null";
+ Py_ssize_t size = bytestring.size;
+
std::string cpp_type_name(bytes, size);
std::stringstream ss;
@@ -517,6 +519,59 @@ Status PandasConverter::ConvertDates() {
return date_builder.Finish(&out_);
}
+#define CONVERT_DECIMAL_CASE(bit_width, builder, object) \
+ case bit_width: { \
+ Decimal##bit_width d; \
+ RETURN_NOT_OK(PythonDecimalToArrowDecimal((object), &d)); \
+ RETURN_NOT_OK((builder).Append(d)); \
+ break; \
+ }
+
+Status PandasConverter::ConvertDecimals() {
+ PyAcquireGIL lock;
+
+ // Import the decimal module and Decimal class
+ OwnedRef decimal;
+ OwnedRef Decimal;
+ RETURN_NOT_OK(ImportModule("decimal", &decimal));
+ RETURN_NOT_OK(ImportFromModule(decimal, "Decimal", &Decimal));
+
+ PyObject** objects = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
+ PyObject* object = objects[0];
+
+ int precision;
+ int scale;
+
+ RETURN_NOT_OK(InferDecimalPrecisionAndScale(object, &precision, &scale));
+
+ type_ = std::make_shared<DecimalType>(precision, scale);
+
+ const int bit_width = std::dynamic_pointer_cast<DecimalType>(type_)->bit_width();
+ DecimalBuilder decimal_builder(pool_, type_);
+
+ RETURN_NOT_OK(decimal_builder.Resize(length_));
+
+ for (int64_t i = 0; i < length_; ++i) {
+ object = objects[i];
+ if (PyObject_IsInstance(object, Decimal.obj())) {
+ switch (bit_width) {
+ CONVERT_DECIMAL_CASE(32, decimal_builder, object)
+ CONVERT_DECIMAL_CASE(64, decimal_builder, object)
+ CONVERT_DECIMAL_CASE(128, decimal_builder, object)
+ default:
+ break;
+ }
+ } else if (PyObject_is_null(object)) {
+ decimal_builder.AppendNull();
+ } else {
+ return InvalidConversion(object, "decimal.Decimal");
+ }
+ }
+ return decimal_builder.Finish(&out_);
+}
+
+#undef CONVERT_DECIMAL_CASE
+
Status PandasConverter::ConvertObjectStrings() {
PyAcquireGIL lock;
@@ -554,6 +609,90 @@ Status PandasConverter::ConvertObjectFixedWidthBytes(
return Status::OK();
}
+template <typename T>
+Status validate_precision(int precision) {
+ constexpr static const int maximum_precision = DecimalPrecision<T>::maximum;
+ if (!(precision > 0 && precision <= maximum_precision)) {
+ std::stringstream ss;
+ ss << "Invalid precision: " << precision << ". Minimum is 1, maximum is "
+ << maximum_precision;
+ return Status::Invalid(ss.str());
+ }
+ return Status::OK();
+}
+
+template <typename T>
+Status RawDecimalToString(
+ const uint8_t* bytes, int precision, int scale, std::string* result) {
+ DCHECK_NE(bytes, nullptr);
+ DCHECK_NE(result, nullptr);
+ RETURN_NOT_OK(validate_precision<T>(precision));
+ Decimal<T> decimal;
+ FromBytes(bytes, &decimal);
+ *result = ToString(decimal, precision, scale);
+ return Status::OK();
+}
+
+template Status RawDecimalToString<int32_t>(
+ const uint8_t*, int, int, std::string* result);
+template Status RawDecimalToString<int64_t>(
+ const uint8_t*, int, int, std::string* result);
+
+Status RawDecimalToString(const uint8_t* bytes, int precision, int scale,
+ bool is_negative, std::string* result) {
+ DCHECK_NE(bytes, nullptr);
+ DCHECK_NE(result, nullptr);
+ RETURN_NOT_OK(validate_precision<int128_t>(precision));
+ Decimal128 decimal;
+ FromBytes(bytes, is_negative, &decimal);
+ *result = ToString(decimal, precision, scale);
+ return Status::OK();
+}
+
+static Status ConvertDecimals(const ChunkedArray& data, PyObject** out_values) {
+ PyAcquireGIL lock;
+ OwnedRef decimal_ref;
+ OwnedRef Decimal_ref;
+ RETURN_NOT_OK(ImportModule("decimal", &decimal_ref));
+ RETURN_NOT_OK(ImportFromModule(decimal_ref, "Decimal", &Decimal_ref));
+ PyObject* Decimal = Decimal_ref.obj();
+
+ for (int c = 0; c < data.num_chunks(); c++) {
+ auto* arr(static_cast<arrow::DecimalArray*>(data.chunk(c).get()));
+ auto type(std::dynamic_pointer_cast<arrow::DecimalType>(arr->type()));
+ const int precision = type->precision;
+ const int scale = type->scale;
+ const int bit_width = type->bit_width();
+
+ for (int64_t i = 0; i < arr->length(); ++i) {
+ if (arr->IsNull(i)) {
+ Py_INCREF(Py_None);
+ *out_values++ = Py_None;
+ } else {
+ const uint8_t* raw_value = arr->GetValue(i);
+ std::string s;
+ switch (bit_width) {
+ case 32:
+ RETURN_NOT_OK(RawDecimalToString<int32_t>(raw_value, precision, scale, &s));
+ break;
+ case 64:
+ RETURN_NOT_OK(RawDecimalToString<int64_t>(raw_value, precision, scale, &s));
+ break;
+ case 128:
+ RETURN_NOT_OK(
+ RawDecimalToString(raw_value, precision, scale, arr->IsNegative(i), &s));
+ break;
+ default:
+ break;
+ }
+ RETURN_NOT_OK(DecimalFromString(Decimal, s, out_values++));
+ }
+ }
+ }
+
+ return Status::OK();
+}
+
Status PandasConverter::ConvertBooleans() {
PyAcquireGIL lock;
@@ -598,6 +737,7 @@ Status PandasConverter::ConvertObjects() {
//
// * Strings
// * Booleans with nulls
+ // * decimal.Decimals
// * Mixed type (not supported at the moment by arrow format)
//
// Additionally, nulls may be encoded either as np.nan or None. So we have to
@@ -613,6 +753,7 @@ Status PandasConverter::ConvertObjects() {
PyDateTime_IMPORT;
}
+ // This means we received an explicit type from the user
if (type_) {
switch (type_->type) {
case Type::STRING:
@@ -627,10 +768,17 @@ Status PandasConverter::ConvertObjects() {
const auto& list_field = static_cast<const ListType&>(*type_);
return ConvertLists(list_field.value_field()->type);
}
+ case Type::DECIMAL:
+ return ConvertDecimals();
default:
return Status::TypeError("No known conversion to Arrow type");
}
} else {
+ OwnedRef decimal;
+ OwnedRef Decimal;
+ RETURN_NOT_OK(ImportModule("decimal", &decimal));
+ RETURN_NOT_OK(ImportFromModule(decimal, "Decimal", &Decimal));
+
for (int64_t i = 0; i < length_; ++i) {
if (PyObject_is_null(objects[i])) {
continue;
@@ -640,6 +788,8 @@ Status PandasConverter::ConvertObjects() {
return ConvertBooleans();
} else if (PyDate_CheckExact(objects[i])) {
return ConvertDates();
+ } else if (PyObject_IsInstance(const_cast<PyObject*>(objects[i]), Decimal.obj())) {
+ return ConvertDecimals();
} else {
return InvalidConversion(
const_cast<PyObject*>(objects[i]), "string, bool, or date");
@@ -847,6 +997,7 @@ class PandasBlock {
INT64,
FLOAT,
DOUBLE,
+ DECIMAL,
BOOL,
DATETIME,
DATETIME_WITH_TZ,
@@ -1193,6 +1344,8 @@ class ObjectBlock : public PandasBlock {
RETURN_NOT_OK(ConvertBinaryLike<StringArray>(data, out_buffer));
} else if (type == Type::FIXED_SIZE_BINARY) {
RETURN_NOT_OK(ConvertFixedSizeBinary(data, out_buffer));
+ } else if (type == Type::DECIMAL) {
+ RETURN_NOT_OK(ConvertDecimals(data, out_buffer));
} else if (type == Type::LIST) {
auto list_type = std::static_pointer_cast<ListType>(col->type());
switch (list_type->value_type()->type) {
@@ -1519,6 +1672,7 @@ Status MakeBlock(PandasBlock::type type, int64_t num_rows, int num_columns,
BLOCK_CASE(DOUBLE, Float64Block);
BLOCK_CASE(BOOL, BoolBlock);
BLOCK_CASE(DATETIME, DatetimeBlock);
+ BLOCK_CASE(DECIMAL, ObjectBlock);
default:
return Status::NotImplemented("Unsupported block type");
}
@@ -1649,6 +1803,9 @@ class DataFrameBlockCreator {
case Type::DICTIONARY:
output_type = PandasBlock::CATEGORICAL;
break;
+ case Type::DECIMAL:
+ output_type = PandasBlock::DECIMAL;
+ break;
default:
return Status::NotImplemented(col->type()->ToString());
}
@@ -1892,6 +2049,7 @@ class ArrowDeserializer {
CONVERT_CASE(TIMESTAMP);
CONVERT_CASE(DICTIONARY);
CONVERT_CASE(LIST);
+ CONVERT_CASE(DECIMAL);
default: {
std::stringstream ss;
ss << "Arrow type reading not implemented for " << col_->type()->ToString();
@@ -1999,6 +2157,13 @@ class ArrowDeserializer {
return ConvertFixedSizeBinary(data_, out_values);
}
+ template <int TYPE>
+ inline typename std::enable_if<TYPE == Type::DECIMAL, Status>::type ConvertValues() {
+ RETURN_NOT_OK(AllocateOutput(NPY_OBJECT));
+ auto out_values = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
+ return ConvertDecimals(data_, out_values);
+ }
+
#define CONVERTVALUES_LISTSLIKE_CASE(ArrowType, ArrowEnum) \
case Type::ArrowEnum: \
return ConvertListsLike<ArrowType>(col_, out_values);
@@ -2021,6 +2186,7 @@ class ArrowDeserializer {
CONVERTVALUES_LISTSLIKE_CASE(FloatType, FLOAT)
CONVERTVALUES_LISTSLIKE_CASE(DoubleType, DOUBLE)
CONVERTVALUES_LISTSLIKE_CASE(StringType, STRING)
+ CONVERTVALUES_LISTSLIKE_CASE(DecimalType, DECIMAL)
default: {
std::stringstream ss;
ss << "Not implemented type for lists: " << list_type->value_type()->ToString();
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/python/python-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc
index f269ebf..b63d2ff 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -28,8 +28,11 @@
#include "arrow/python/builtin_convert.h"
#include "arrow/python/common.h"
+#include "arrow/python/helpers.h"
#include "arrow/python/pandas_convert.h"
+#include "arrow/util/decimal.h"
+
namespace arrow {
namespace py {
@@ -37,6 +40,36 @@ TEST(PyBuffer, InvalidInputObject) {
PyBuffer buffer(Py_None);
}
+TEST(DecimalTest, TestPythonDecimalToArrowDecimal128) {
+ PyAcquireGIL lock;
+
+ OwnedRef decimal;
+ OwnedRef Decimal;
+ ASSERT_OK(ImportModule("decimal", &decimal));
+ ASSERT_NE(decimal.obj(), nullptr);
+
+ ASSERT_OK(ImportFromModule(decimal, "Decimal", &Decimal));
+ ASSERT_NE(Decimal.obj(), nullptr);
+
+ std::string decimal_string("-39402950693754869342983");
+ const char* format = "s#";
+ auto c_string = decimal_string.c_str();
+ ASSERT_NE(c_string, nullptr);
+
+ auto c_string_size = decimal_string.size();
+ ASSERT_GT(c_string_size, 0);
+ OwnedRef pydecimal(PyObject_CallFunction(
+ Decimal.obj(), const_cast<char*>(format), c_string, c_string_size));
+ ASSERT_NE(pydecimal.obj(), nullptr);
+ ASSERT_EQ(PyErr_Occurred(), nullptr);
+
+ Decimal128 arrow_decimal;
+ int128_t boost_decimal(decimal_string);
+ PyObject* obj = pydecimal.obj();
+ ASSERT_OK(PythonDecimalToArrowDecimal(obj, &arrow_decimal));
+ ASSERT_EQ(boost_decimal, arrow_decimal.value);
+}
+
TEST(PandasConversionTest, TestObjectBlockWriteFails) {
StringBuilder builder(default_memory_pool());
const char value[] = {'\xf1', '\0'};
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index abbb626..df4590f 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -17,6 +17,7 @@
#include "arrow/type.h"
+#include <climits>
#include <sstream>
#include <string>
@@ -91,7 +92,7 @@ std::string BinaryType::ToString() const {
}
int FixedSizeBinaryType::bit_width() const {
- return 8 * byte_width();
+ return CHAR_BIT * byte_width();
}
std::string FixedSizeBinaryType::ToString() const {
@@ -380,6 +381,10 @@ std::shared_ptr<Field> field(
return std::make_shared<Field>(name, type, nullable);
}
+std::shared_ptr<DataType> decimal(int precision, int scale) {
+ return std::make_shared<DecimalType>(precision, scale);
+}
+
static const BufferDescr kValidityBuffer(BufferType::VALIDITY, 1);
static const BufferDescr kOffsetBuffer(BufferType::OFFSET, 32);
static const BufferDescr kTypeBuffer(BufferType::TYPE, 32);
@@ -402,7 +407,11 @@ std::vector<BufferDescr> BinaryType::GetBufferLayout() const {
}
std::vector<BufferDescr> FixedSizeBinaryType::GetBufferLayout() const {
- return {kValidityBuffer, BufferDescr(BufferType::DATA, byte_width_ * 8)};
+ return {kValidityBuffer, BufferDescr(BufferType::DATA, bit_width())};
+}
+
+std::vector<BufferDescr> DecimalType::GetBufferLayout() const {
+ return {kValidityBuffer, kBooleanBuffer, BufferDescr(BufferType::DATA, bit_width())};
}
std::vector<BufferDescr> ListType::GetBufferLayout() const {
@@ -427,9 +436,4 @@ std::string DecimalType::ToString() const {
return s.str();
}
-std::vector<BufferDescr> DecimalType::GetBufferLayout() const {
- // TODO(wesm)
- return {};
-}
-
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 36ab9d8..3a35f56 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -360,6 +360,8 @@ class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType {
explicit FixedSizeBinaryType(int32_t byte_width)
: FixedWidthType(Type::FIXED_SIZE_BINARY), byte_width_(byte_width) {}
+ explicit FixedSizeBinaryType(int32_t byte_width, Type::type type_id)
+ : FixedWidthType(type_id), byte_width_(byte_width) {}
Status Accept(TypeVisitor* visitor) const override;
std::string ToString() const override;
@@ -399,19 +401,31 @@ struct ARROW_EXPORT StructType : public NestedType {
std::vector<BufferDescr> GetBufferLayout() const override;
};
-struct ARROW_EXPORT DecimalType : public DataType {
+static inline int decimal_byte_width(int precision) {
+ if (precision >= 0 && precision < 10) {
+ return 4;
+ } else if (precision >= 10 && precision < 19) {
+ return 8;
+ } else {
+ // TODO(phillipc): validate that we can't construct > 128 bit types
+ return 16;
+ }
+}
+
+struct ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
static constexpr Type::type type_id = Type::DECIMAL;
explicit DecimalType(int precision_, int scale_)
- : DataType(Type::DECIMAL), precision(precision_), scale(scale_) {}
- int precision;
- int scale;
-
+ : FixedSizeBinaryType(decimal_byte_width(precision_), Type::DECIMAL),
+ precision(precision_),
+ scale(scale_) {}
+ std::vector<BufferDescr> GetBufferLayout() const override;
Status Accept(TypeVisitor* visitor) const override;
std::string ToString() const override;
static std::string name() { return "decimal"; }
- std::vector<BufferDescr> GetBufferLayout() const override;
+ int precision;
+ int scale;
};
enum class UnionMode : char { SPARSE, DENSE };
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/type_fwd.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 2e27ce9..acf12c3 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -69,6 +69,7 @@ class StructBuilder;
struct DecimalType;
class DecimalArray;
+class DecimalBuilder;
struct UnionType;
class UnionArray;
@@ -146,6 +147,7 @@ std::shared_ptr<DataType> ARROW_EXPORT binary();
std::shared_ptr<DataType> ARROW_EXPORT date32();
std::shared_ptr<DataType> ARROW_EXPORT date64();
+std::shared_ptr<DataType> ARROW_EXPORT decimal(int precision, int scale);
} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/type_traits.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index 353b638..3e8ea23 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -229,6 +229,13 @@ struct TypeTraits<DoubleType> {
};
template <>
+struct TypeTraits<DecimalType> {
+ using ArrayType = DecimalArray;
+ using BuilderType = DecimalBuilder;
+ constexpr static bool is_parameter_free = false;
+};
+
+template <>
struct TypeTraits<BooleanType> {
using ArrayType = BooleanArray;
using BuilderType = BooleanBuilder;
@@ -289,12 +296,6 @@ struct TypeTraits<DictionaryType> {
constexpr static bool is_parameter_free = false;
};
-template <>
-struct TypeTraits<DecimalType> {
- // using ArrayType = DecimalArray;
- constexpr static bool is_parameter_free = false;
-};
-
// Not all type classes have a c_type
template <typename T>
struct as_void {
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/util/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
index c1b6877..054f110 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -22,6 +22,7 @@
# Headers: top level
install(FILES
bit-util.h
+ decimal.h
logging.h
macros.h
random.h
@@ -70,3 +71,4 @@ endif()
ADD_ARROW_TEST(bit-util-test)
ADD_ARROW_TEST(stl-util-test)
+ADD_ARROW_TEST(decimal-test)
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/util/bit-util.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h
index 42afd07..90a1c3e 100644
--- a/cpp/src/arrow/util/bit-util.h
+++ b/cpp/src/arrow/util/bit-util.h
@@ -149,7 +149,6 @@ int64_t ARROW_EXPORT CountSetBits(
bool ARROW_EXPORT BitmapEquals(const uint8_t* left, int64_t left_offset,
const uint8_t* right, int64_t right_offset, int64_t bit_length);
-
} // namespace arrow
#endif // ARROW_UTIL_BIT_UTIL_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/util/decimal-test.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/decimal-test.cc b/cpp/src/arrow/util/decimal-test.cc
new file mode 100644
index 0000000..1e22643
--- /dev/null
+++ b/cpp/src/arrow/util/decimal-test.cc
@@ -0,0 +1,161 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+
+#include "arrow/util/decimal.h"
+
+#include "gtest/gtest.h"
+
+#include "arrow/test-util.h"
+
+namespace arrow {
+
+template <typename T>
+class DecimalTest : public ::testing::Test {
+ public:
+ DecimalTest() : string_value("234.23445") { integer_value.value = 23423445; }
+ Decimal<T> integer_value;
+ std::string string_value;
+};
+
+typedef ::testing::Types<int32_t, int64_t, int128_t> DecimalTypes;
+TYPED_TEST_CASE(DecimalTest, DecimalTypes);
+
+TYPED_TEST(DecimalTest, TestToString) {
+ Decimal<TypeParam> decimal(this->integer_value);
+ int precision = 8;
+ int scale = 5;
+ std::string result = ToString(decimal, precision, scale);
+ ASSERT_EQ(result, this->string_value);
+}
+
+TYPED_TEST(DecimalTest, TestFromString) {
+ Decimal<TypeParam> expected(this->integer_value);
+ Decimal<TypeParam> result;
+ int precision, scale;
+ ASSERT_OK(FromString(this->string_value, &result, &precision, &scale));
+ ASSERT_EQ(result.value, expected.value);
+ ASSERT_EQ(precision, 8);
+ ASSERT_EQ(scale, 5);
+}
+
+TEST(DecimalTest, TestStringToInt32) {
+ int32_t value = 0;
+ StringToInteger("123", "456", 1, &value);
+ ASSERT_EQ(value, 123456);
+}
+
+TEST(DecimalTest, TestStringToInt64) {
+ int64_t value = 0;
+ StringToInteger("123456789", "456", -1, &value);
+ ASSERT_EQ(value, -123456789456);
+}
+
+TEST(DecimalTest, TestStringToInt128) {
+ int128_t value = 0;
+ StringToInteger("123456789", "456789123", 1, &value);
+ ASSERT_EQ(value, 123456789456789123);
+}
+
+TEST(DecimalTest, TestFromString128) {
+ static const std::string string_value("-23049223942343532412");
+ Decimal<int128_t> result(string_value);
+ int128_t expected = -230492239423435324;
+ ASSERT_EQ(result.value, expected * 100 - 12);
+
+ // Sanity check that our number is actually using more than 64 bits
+ ASSERT_NE(result.value, static_cast<int64_t>(result.value));
+}
+
+TEST(DecimalTest, TestFromDecimalString128) {
+ static const std::string string_value("-23049223942343.532412");
+ Decimal<int128_t> result(string_value);
+ int128_t expected = -230492239423435324;
+ ASSERT_EQ(result.value, expected * 100 - 12);
+
+ // Sanity check that our number is actually using more than 64 bits
+ ASSERT_NE(result.value, static_cast<int64_t>(result.value));
+}
+
+TEST(DecimalTest, TestDecimal32Precision) {
+ auto min_precision = DecimalPrecision<int32_t>::minimum;
+ auto max_precision = DecimalPrecision<int32_t>::maximum;
+ ASSERT_EQ(min_precision, 1);
+ ASSERT_EQ(max_precision, 9);
+}
+
+TEST(DecimalTest, TestDecimal64Precision) {
+ auto min_precision = DecimalPrecision<int64_t>::minimum;
+ auto max_precision = DecimalPrecision<int64_t>::maximum;
+ ASSERT_EQ(min_precision, 10);
+ ASSERT_EQ(max_precision, 18);
+}
+
+TEST(DecimalTest, TestDecimal128Precision) {
+ auto min_precision = DecimalPrecision<int128_t>::minimum;
+ auto max_precision = DecimalPrecision<int128_t>::maximum;
+ ASSERT_EQ(min_precision, 19);
+ ASSERT_EQ(max_precision, 38);
+}
+
+TEST(DecimalTest, TestDecimal32SignedRoundTrip) {
+ Decimal32 expected(std::string("-3402692"));
+
+ uint8_t stack_bytes[4] = {0};
+ uint8_t* bytes = stack_bytes;
+ ToBytes(expected, &bytes);
+
+ Decimal32 result;
+ FromBytes(bytes, &result);
+ ASSERT_EQ(expected.value, result.value);
+}
+
+TEST(DecimalTest, TestDecimal64SignedRoundTrip) {
+ Decimal64 expected(std::string("-34034293045.921"));
+
+ uint8_t stack_bytes[8] = {0};
+ uint8_t* bytes = stack_bytes;
+ ToBytes(expected, &bytes);
+
+ Decimal64 result;
+ FromBytes(bytes, &result);
+
+ ASSERT_EQ(expected.value, result.value);
+}
+
+TEST(DecimalTest, TestDecimal128StringAndBytesRoundTrip) {
+ std::string string_value("-340282366920938463463374607431.711455");
+ Decimal128 expected(string_value);
+
+ std::string expected_string_value("-340282366920938463463374607431711455");
+ int128_t expected_underlying_value(expected_string_value);
+
+ ASSERT_EQ(expected.value, expected_underlying_value);
+
+ uint8_t stack_bytes[16] = {0};
+ uint8_t* bytes = stack_bytes;
+ bool is_negative;
+ ToBytes(expected, &bytes, &is_negative);
+
+ ASSERT_TRUE(is_negative);
+
+ Decimal128 result;
+ FromBytes(bytes, is_negative, &result);
+
+ ASSERT_EQ(expected.value, result.value);
+}
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/util/decimal.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc
new file mode 100644
index 0000000..1ac3471
--- /dev/null
+++ b/cpp/src/arrow/util/decimal.cc
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/util/decimal.h"
+
+#include <boost/regex.hpp>
+
+namespace arrow {
+
+static const boost::regex DECIMAL_PATTERN("(\\+?|-?)((0*)(\\d*))(\\.(\\d+))?");
+
+template <typename T>
+ARROW_EXPORT Status FromString(
+ const std::string& s, Decimal<T>* out, int* precision, int* scale) {
+ if (s.empty()) {
+ return Status::Invalid("Empty string cannot be converted to decimal");
+ }
+ boost::smatch match;
+ if (!boost::regex_match(s, match, DECIMAL_PATTERN)) {
+ std::stringstream ss;
+ ss << "String " << s << " is not a valid decimal string";
+ return Status::Invalid(ss.str());
+ }
+ const int8_t sign = match[1].str() == "-" ? -1 : 1;
+ std::string whole_part = match[4].str();
+ std::string fractional_part = match[6].str();
+ if (scale != nullptr) { *scale = static_cast<int>(fractional_part.size()); }
+ if (precision != nullptr) {
+ *precision =
+ static_cast<int>(whole_part.size()) + static_cast<int>(fractional_part.size());
+ }
+ if (out != nullptr) { StringToInteger(whole_part, fractional_part, sign, &out->value); }
+ return Status::OK();
+}
+
+template ARROW_EXPORT Status FromString(
+ const std::string& s, Decimal32* out, int* precision, int* scale);
+template ARROW_EXPORT Status FromString(
+ const std::string& s, Decimal64* out, int* precision, int* scale);
+template ARROW_EXPORT Status FromString(
+ const std::string& s, Decimal128* out, int* precision, int* scale);
+
+void StringToInteger(
+ const std::string& whole, const std::string& fractional, int8_t sign, int32_t* out) {
+ DCHECK(sign == -1 || sign == 1);
+ DCHECK_NE(out, nullptr);
+ DCHECK(!whole.empty() || !fractional.empty());
+ if (!whole.empty()) {
+ *out = std::stoi(whole, nullptr, 10) *
+ static_cast<int32_t>(pow(10.0, static_cast<double>(fractional.size())));
+ }
+ if (!fractional.empty()) { *out += std::stoi(fractional, nullptr, 10); }
+ *out *= sign;
+}
+
+void StringToInteger(
+ const std::string& whole, const std::string& fractional, int8_t sign, int64_t* out) {
+ DCHECK(sign == -1 || sign == 1);
+ DCHECK_NE(out, nullptr);
+ DCHECK(!whole.empty() || !fractional.empty());
+ if (!whole.empty()) {
+ *out = static_cast<int64_t>(std::stoll(whole, nullptr, 10)) *
+ static_cast<int64_t>(pow(10.0, static_cast<double>(fractional.size())));
+ }
+ if (!fractional.empty()) { *out += std::stoll(fractional, nullptr, 10); }
+ *out *= sign;
+}
+
+void StringToInteger(
+ const std::string& whole, const std::string& fractional, int8_t sign, int128_t* out) {
+ DCHECK(sign == -1 || sign == 1);
+ DCHECK_NE(out, nullptr);
+ DCHECK(!whole.empty() || !fractional.empty());
+ *out = int128_t(whole + fractional) * sign;
+}
+
+void FromBytes(const uint8_t* bytes, Decimal32* decimal) {
+ DCHECK_NE(bytes, nullptr);
+ DCHECK_NE(decimal, nullptr);
+ decimal->value = *reinterpret_cast<const int32_t*>(bytes);
+}
+
+void FromBytes(const uint8_t* bytes, Decimal64* decimal) {
+ DCHECK_NE(bytes, nullptr);
+ DCHECK_NE(decimal, nullptr);
+ decimal->value = *reinterpret_cast<const int64_t*>(bytes);
+}
+
+constexpr static const size_t BYTES_IN_128_BITS = 128 / CHAR_BIT;
+constexpr static const size_t LIMB_SIZE =
+ sizeof(std::remove_pointer<int128_t::backend_type::limb_pointer>::type);
+constexpr static const size_t BYTES_PER_LIMB = BYTES_IN_128_BITS / LIMB_SIZE;
+
+void FromBytes(const uint8_t* bytes, bool is_negative, Decimal128* decimal) {
+ DCHECK_NE(bytes, nullptr);
+ DCHECK_NE(decimal, nullptr);
+
+ auto& decimal_value(decimal->value);
+ int128_t::backend_type& backend(decimal_value.backend());
+ backend.resize(BYTES_PER_LIMB, BYTES_PER_LIMB);
+ std::memcpy(backend.limbs(), bytes, BYTES_IN_128_BITS);
+ if (is_negative) { decimal->value = -decimal->value; }
+}
+
+void ToBytes(const Decimal32& value, uint8_t** bytes) {
+ DCHECK_NE(*bytes, nullptr);
+ *reinterpret_cast<int32_t*>(*bytes) = value.value;
+}
+
+void ToBytes(const Decimal64& value, uint8_t** bytes) {
+ DCHECK_NE(*bytes, nullptr);
+ *reinterpret_cast<int64_t*>(*bytes) = value.value;
+}
+
+void ToBytes(const Decimal128& decimal, uint8_t** bytes, bool* is_negative) {
+ DCHECK_NE(*bytes, nullptr);
+ DCHECK_NE(is_negative, nullptr);
+
+ /// TODO(phillipc): boost multiprecision is unreliable here, int128_t can't be
+ /// roundtripped
+ const auto& backend(decimal.value.backend());
+ auto boost_bytes = reinterpret_cast<const uint8_t*>(backend.limbs());
+ std::memcpy(*bytes, boost_bytes, BYTES_IN_128_BITS);
+ *is_negative = backend.isneg();
+}
+
+} // namespace arrow
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/util/decimal.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/decimal.h b/cpp/src/arrow/util/decimal.h
new file mode 100644
index 0000000..46883e3
--- /dev/null
+++ b/cpp/src/arrow/util/decimal.h
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_DECIMAL_H
+#define ARROW_DECIMAL_H
+
+#include <cmath>
+#include <cstdlib>
+#include <iterator>
+#include <regex>
+#include <string>
+
+#include "arrow/status.h"
+#include "arrow/util/bit-util.h"
+#include "arrow/util/logging.h"
+
+#include <boost/multiprecision/cpp_int.hpp>
+
+namespace arrow {
+
+using boost::multiprecision::int128_t;
+
+template <typename T>
+struct ARROW_EXPORT Decimal;
+
+ARROW_EXPORT void StringToInteger(
+ const std::string& whole, const std::string& fractional, int8_t sign, int32_t* out);
+ARROW_EXPORT void StringToInteger(
+ const std::string& whole, const std::string& fractional, int8_t sign, int64_t* out);
+ARROW_EXPORT void StringToInteger(
+ const std::string& whole, const std::string& fractional, int8_t sign, int128_t* out);
+
+template <typename T>
+ARROW_EXPORT Status FromString(const std::string& s, Decimal<T>* out,
+ int* precision = nullptr, int* scale = nullptr);
+
+template <typename T>
+struct ARROW_EXPORT Decimal {
+ Decimal() : value() {}
+ explicit Decimal(const std::string& s) : value() { FromString(s, this); }
+ explicit Decimal(const char* s) : Decimal(std::string(s)) {}
+ explicit Decimal(const T& value) : value(value) {}
+
+ using value_type = T;
+ value_type value;
+};
+
+using Decimal32 = Decimal<int32_t>;
+using Decimal64 = Decimal<int64_t>;
+using Decimal128 = Decimal<int128_t>;
+
+template <typename T>
+struct ARROW_EXPORT DecimalPrecision {};
+
+template <>
+struct ARROW_EXPORT DecimalPrecision<int32_t> {
+ constexpr static const int minimum = 1;
+ constexpr static const int maximum = 9;
+};
+
+template <>
+struct ARROW_EXPORT DecimalPrecision<int64_t> {
+ constexpr static const int minimum = 10;
+ constexpr static const int maximum = 18;
+};
+
+template <>
+struct ARROW_EXPORT DecimalPrecision<int128_t> {
+ constexpr static const int minimum = 19;
+ constexpr static const int maximum = 38;
+};
+
+template <typename T>
+ARROW_EXPORT std::string ToString(
+ const Decimal<T>& decimal_value, int precision, int scale) {
+ T value = decimal_value.value;
+
+ // Decimal values are sent to clients as strings so in the interest of
+ // speed the string will be created without the using stringstream with the
+ // whole/fractional_part().
+ size_t last_char_idx = precision + (scale > 0) // Add a space for decimal place
+ + (scale == precision) // Add a space for leading 0
+ + (value < 0); // Add a space for negative sign
+ std::string str = std::string(last_char_idx, '0');
+ // Start filling in the values in reverse order by taking the last digit
+ // of the value. Use a positive value and worry about the sign later. At this
+ // point the last_char_idx points to the string terminator.
+ T remaining_value = value;
+ size_t first_digit_idx = 0;
+ if (value < 0) {
+ remaining_value = -value;
+ first_digit_idx = 1;
+ }
+ if (scale > 0) {
+ int remaining_scale = scale;
+ do {
+ str[--last_char_idx] = static_cast<char>(
+ (remaining_value % 10) + static_cast<T>('0')); // Ascii offset
+ remaining_value /= 10;
+ } while (--remaining_scale > 0);
+ str[--last_char_idx] = '.';
+ DCHECK_GT(last_char_idx, first_digit_idx) << "Not enough space remaining";
+ }
+ do {
+ str[--last_char_idx] =
+ static_cast<char>((remaining_value % 10) + static_cast<T>('0')); // Ascii offset
+ remaining_value /= 10;
+ if (remaining_value == 0) {
+ // Trim any extra leading 0's.
+ if (last_char_idx > first_digit_idx) str.erase(0, last_char_idx - first_digit_idx);
+ break;
+ }
+ // For safety, enforce string length independent of remaining_value.
+ } while (last_char_idx > first_digit_idx);
+ if (value < 0) str[0] = '-';
+ return str;
+}
+
+/// Conversion from raw bytes to a Decimal value
+ARROW_EXPORT void FromBytes(const uint8_t* bytes, Decimal32* value);
+ARROW_EXPORT void FromBytes(const uint8_t* bytes, Decimal64* value);
+ARROW_EXPORT void FromBytes(const uint8_t* bytes, bool is_negative, Decimal128* decimal);
+
+/// Conversion from a Decimal value to raw bytes
+ARROW_EXPORT void ToBytes(const Decimal32& value, uint8_t** bytes);
+ARROW_EXPORT void ToBytes(const Decimal64& value, uint8_t** bytes);
+ARROW_EXPORT void ToBytes(const Decimal128& decimal, uint8_t** bytes, bool* is_negative);
+
+} // namespace arrow
+#endif // ARROW_DECIMAL_H
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/cpp/src/arrow/visitor_inline.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/visitor_inline.h b/cpp/src/arrow/visitor_inline.h
index c61c9f5..29b3db6 100644
--- a/cpp/src/arrow/visitor_inline.h
+++ b/cpp/src/arrow/visitor_inline.h
@@ -93,7 +93,7 @@ inline Status VisitArrayInline(const Array& array, VISITOR* visitor) {
ARRAY_VISIT_INLINE(TimestampType);
ARRAY_VISIT_INLINE(Time32Type);
ARRAY_VISIT_INLINE(Time64Type);
- // ARRAY_VISIT_INLINE(DecimalType);
+ ARRAY_VISIT_INLINE(DecimalType);
ARRAY_VISIT_INLINE(ListType);
ARRAY_VISIT_INLINE(StructType);
ARRAY_VISIT_INLINE(UnionType);
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/format/Schema.fbs
----------------------------------------------------------------------
diff --git a/format/Schema.fbs b/format/Schema.fbs
index ca9c8e6..badc7ea 100644
--- a/format/Schema.fbs
+++ b/format/Schema.fbs
@@ -77,7 +77,9 @@ table Bool {
}
table Decimal {
+ /// Total number of decimal digits
precision: int;
+ /// Number of digits after the decimal point "."
scale: int;
}
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 8c52074..7b23cf6 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -71,7 +71,7 @@ from pyarrow.schema import (null, bool_,
uint8, uint16, uint32, uint64,
timestamp, date32, date64,
float16, float32, float64,
- binary, string,
+ binary, string, decimal,
list_, struct, dictionary, field,
DataType, FixedSizeBinaryType,
Field, Schema, schema)
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/python/pyarrow/array.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxd b/python/pyarrow/array.pxd
index f6aaea2..3ba4871 100644
--- a/python/pyarrow/array.pxd
+++ b/python/pyarrow/array.pxd
@@ -116,6 +116,10 @@ cdef class FixedSizeBinaryArray(Array):
pass
+cdef class DecimalArray(FixedSizeBinaryArray):
+ pass
+
+
cdef class ListArray(Array):
pass
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/python/pyarrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
index 9f302e0..ee500e6 100644
--- a/python/pyarrow/array.pyx
+++ b/python/pyarrow/array.pyx
@@ -481,6 +481,10 @@ cdef class FixedSizeBinaryArray(Array):
pass
+cdef class DecimalArray(FixedSizeBinaryArray):
+ pass
+
+
cdef class ListArray(Array):
pass
@@ -602,6 +606,7 @@ cdef dict _array_classes = {
Type_STRING: StringArray,
Type_DICTIONARY: DictionaryArray,
Type_FIXED_SIZE_BINARY: FixedSizeBinaryArray,
+ Type_DECIMAL: DecimalArray,
}
cdef object box_array(const shared_ptr[CArray]& sp_array):
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/python/pyarrow/includes/common.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/common.pxd b/python/pyarrow/includes/common.pxd
index ab38ff3..4860334 100644
--- a/python/pyarrow/includes/common.pxd
+++ b/python/pyarrow/includes/common.pxd
@@ -51,6 +51,11 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
c_bool IsTypeError()
+cdef extern from "arrow/util/decimal.h" namespace "arrow" nogil:
+ cdef cppclass int128_t:
+ pass
+
+
cdef inline object PyObject_to_object(PyObject* o):
# Cast to "object" increments reference count
cdef object result = <object> o
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 2a0488f..73d96b2 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -39,6 +39,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
Type_FLOAT" arrow::Type::FLOAT"
Type_DOUBLE" arrow::Type::DOUBLE"
+ Type_DECIMAL" arrow::Type::DECIMAL"
+
Type_DATE32" arrow::Type::DATE32"
Type_DATE64" arrow::Type::DATE64"
Type_TIMESTAMP" arrow::Type::TIMESTAMP"
@@ -58,6 +60,11 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
TimeUnit_MICRO" arrow::TimeUnit::MICRO"
TimeUnit_NANO" arrow::TimeUnit::NANO"
+ cdef cppclass Decimal[T]:
+ Decimal(const T&)
+
+ cdef c_string ToString[T](const Decimal[T]&, int, int)
+
cdef cppclass CDataType" arrow::DataType":
Type type
@@ -144,6 +151,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CFixedSizeBinaryType" arrow::FixedSizeBinaryType"(CFixedWidthType):
CFixedSizeBinaryType(int byte_width)
int byte_width()
+ int bit_width()
+
+ cdef cppclass CDecimalType" arrow::DecimalType"(CFixedSizeBinaryType):
+ int precision
+ int scale
+ CDecimalType(int precision, int scale)
cdef cppclass CField" arrow::Field":
c_string name
@@ -212,6 +225,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CFixedSizeBinaryArray" arrow::FixedSizeBinaryArray"(CArray):
const uint8_t* GetValue(int i)
+ cdef cppclass CDecimalArray" arrow::DecimalArray"(CFixedSizeBinaryArray):
+ Decimal[T] Value[T](int i)
+
cdef cppclass CListArray" arrow::ListArray"(CArray):
const int32_t* raw_value_offsets()
int32_t value_offset(int i)
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/python/pyarrow/scalar.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pxd b/python/pyarrow/scalar.pxd
index d6c3b35..62a5664 100644
--- a/python/pyarrow/scalar.pxd
+++ b/python/pyarrow/scalar.pxd
@@ -20,6 +20,7 @@ from pyarrow.includes.libarrow cimport *
from pyarrow.schema cimport DataType
+
cdef class Scalar:
cdef readonly:
DataType type
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
index 1c0790a..f3d9321 100644
--- a/python/pyarrow/scalar.pyx
+++ b/python/pyarrow/scalar.pyx
@@ -17,9 +17,10 @@
from pyarrow.schema cimport DataType, box_data_type
+from pyarrow.includes.common cimport int128_t
from pyarrow.compat import frombytes
import pyarrow.schema as schema
-
+import decimal
import datetime
cimport cpython as cp
@@ -64,7 +65,7 @@ cdef class ArrayValue(Scalar):
if hasattr(self, 'as_py'):
return repr(self.as_py())
else:
- return Scalar.__repr__(self)
+ return super(Scalar, self).__repr__()
cdef class BooleanValue(ArrayValue):
@@ -199,6 +200,25 @@ cdef class DoubleValue(ArrayValue):
return ap.Value(self.index)
+cdef class DecimalValue(ArrayValue):
+
+ def as_py(self):
+ cdef:
+ CDecimalArray* ap = <CDecimalArray*> self.sp_array.get()
+ CDecimalType* t = <CDecimalType*> ap.type().get()
+ int bit_width = t.bit_width()
+ int precision = t.precision
+ int scale = t.scale
+ c_string s
+ if bit_width == 32:
+ s = ToString[int32_t](ap.Value[int32_t](self.index), precision, scale)
+ elif bit_width == 64:
+ s = ToString[int64_t](ap.Value[int64_t](self.index), precision, scale)
+ elif bit_width == 128:
+ s = ToString[int128_t](ap.Value[int128_t](self.index), precision, scale)
+ return decimal.Decimal(s.decode('utf8'))
+
+
cdef class StringValue(ArrayValue):
def as_py(self):
@@ -286,6 +306,7 @@ cdef dict _scalar_classes = {
Type_BINARY: BinaryValue,
Type_STRING: StringValue,
Type_FIXED_SIZE_BINARY: FixedSizeBinaryValue,
+ Type_DECIMAL: DecimalValue,
}
cdef object box_scalar(DataType type, const shared_ptr[CArray]& sp_array,
http://git-wip-us.apache.org/repos/asf/arrow/blob/754bcce6/python/pyarrow/schema.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pxd b/python/pyarrow/schema.pxd
index 94d65bf..eceedba 100644
--- a/python/pyarrow/schema.pxd
+++ b/python/pyarrow/schema.pxd
@@ -20,6 +20,7 @@ from pyarrow.includes.libarrow cimport (CDataType,
CDictionaryType,
CTimestampType,
CFixedSizeBinaryType,
+ CDecimalType,
CField, CSchema)
cdef class DataType:
@@ -27,7 +28,7 @@ cdef class DataType:
shared_ptr[CDataType] sp_type
CDataType* type
- cdef init(self, const shared_ptr[CDataType]& type)
+ cdef void init(self, const shared_ptr[CDataType]& type)
cdef class DictionaryType(DataType):
@@ -45,6 +46,11 @@ cdef class FixedSizeBinaryType(DataType):
const CFixedSizeBinaryType* fixed_size_binary_type
+cdef class DecimalType(FixedSizeBinaryType):
+ cdef:
+ const CDecimalType* decimal_type
+
+
cdef class Field:
cdef:
shared_ptr[CField] sp_field
@@ -55,6 +61,7 @@ cdef class Field:
cdef init(self, const shared_ptr[CField]& field)
+
cdef class Schema:
cdef:
shared_ptr[CSchema] sp_schema
@@ -63,6 +70,7 @@ cdef class Schema:
cdef init(self, const vector[shared_ptr[CField]]& fields)
cdef init_schema(self, const shared_ptr[CSchema]& schema)
+
cdef DataType box_data_type(const shared_ptr[CDataType]& type)
cdef Field box_field(const shared_ptr[CField]& field)
cdef Schema box_schema(const shared_ptr[CSchema]& schema)