You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2018/08/01 20:32:58 UTC
[arrow] branch master updated: ARROW-2911: [Python] Parquet binary statistics that end in '\0' truncate last byte
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new cf79192 ARROW-2911: [Python] Parquet binary statistics that end in '\0' truncate last byte
cf79192 is described below
commit cf79192ebc2bf54c68b7b56dc938d91f72441b96
Author: Korn, Uwe <Uw...@blue-yonder.com>
AuthorDate: Wed Aug 1 22:32:45 2018 +0200
ARROW-2911: [Python] Parquet binary statistics that end in '\0' truncate last byte
Depends on https://github.com/apache/parquet-cpp/pull/479
Merge once parquet-cpp 1.5.0 is released.
Author: Korn, Uwe <Uw...@blue-yonder.com>
Closes #2326 from xhochy/ARROW-2911 and squashes the following commits:
50fa61d <Korn, Uwe> Remove deprecated version of FormatStatValue
bb2a4fa <Korn, Uwe> ARROW-2911: Parquet binary statistics that end in '\0' truncate last byte
---
python/pyarrow/_parquet.pxd | 2 +-
python/pyarrow/_parquet.pyx | 4 ++--
python/pyarrow/tests/test_parquet.py | 4 ++++
3 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index ca20ce2..564391e 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -134,7 +134,7 @@ cdef extern from "parquet/api/schema.h" namespace "parquet" nogil:
c_bool Equals(const SchemaDescriptor& other)
int num_columns()
- cdef c_string FormatStatValue(ParquetType parquet_type, const char* val)
+ cdef c_string FormatStatValue(ParquetType parquet_type, c_string val)
cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 1aa2124..7b97d06 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -99,7 +99,7 @@ cdef class RowGroupStatistics:
raw_physical_type = self.statistics.get().physical_type()
encode_min = self.statistics.get().EncodeMin()
- min_value = FormatStatValue(raw_physical_type, encode_min.c_str())
+ min_value = FormatStatValue(raw_physical_type, encode_min)
return self._cast_statistic(min_value)
@property
@@ -107,7 +107,7 @@ cdef class RowGroupStatistics:
raw_physical_type = self.statistics.get().physical_type()
encode_max = self.statistics.get().EncodeMax()
- max_value = FormatStatValue(raw_physical_type, encode_max.c_str())
+ max_value = FormatStatValue(raw_physical_type, encode_max)
return self._cast_statistic(max_value)
@property
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 1d3a6c1..324ae16 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -630,6 +630,10 @@ def test_parquet_metadata_api():
[True, False, False, True, True], pa.bool_(),
'BOOLEAN', False, True, 0, 5, 0
),
+ (
+ [b'\x00', b'b', b'12', None, b'aaa'], pa.binary(),
+ 'BYTE_ARRAY', b'\x00', b'b', 1, 4, 0
+ ),
]
)
def test_parquet_column_statistics_api(data, type, physical_type, min_value,