You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/04/21 16:48:25 UTC
[arrow] branch master updated: ARROW-7914: [Python] Allow pandas datetime as index for feather
This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 20ec0fda70 ARROW-7914: [Python] Allow pandas datetime as index for feather
20ec0fda70 is described below
commit 20ec0fda708b72e4398e422f8bc3ee8ef0a76528
Author: Salonijain27 <sa...@gmail.com>
AuthorDate: Thu Apr 21 18:48:14 2022 +0200
ARROW-7914: [Python] Allow pandas datetime as index for feather
Closes #12821 from Salonijain27/ARROW-7914_fetch_update
Lead-authored-by: Salonijain27 <sa...@gmail.com>
Co-authored-by: salonijain27 <sa...@Salonis-MacBook-Pro.local>
Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
python/pyarrow/feather.py | 12 +++++++++++-
python/pyarrow/tests/test_feather.py | 22 ++++++++++++++++++++--
2 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index 6824f4ba96..f20302d67b 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -151,7 +151,17 @@ def write_feather(df, dest, compression=None, compression_level=None,
df = df.to_dense()
if _pandas_api.is_data_frame(df):
- table = Table.from_pandas(df, preserve_index=False)
+ # Feather v1 creates a new column in the resultant Table to
+ # store index information if index type is not RangeIndex
+
+ if version == 1:
+ preserve_index = False
+ elif version == 2:
+ preserve_index = None
+ else:
+ raise ValueError("Version value should either be 1 or 2")
+
+ table = Table.from_pandas(df, preserve_index=preserve_index)
if version == 1:
# Version 1 does not chunking
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 7dab732557..97696fa6a9 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -30,7 +30,6 @@ import pyarrow.tests.strategies as past
from pyarrow.feather import (read_feather, write_feather, read_table,
FeatherDataset)
-
try:
from pandas.testing import assert_frame_equal
import pandas as pd
@@ -90,13 +89,18 @@ def _check_pandas_roundtrip(df, expected=None, path=None,
if path is None:
path = random_path()
+ if version is None:
+ version = 2
+
TEST_FILES.append(path)
write_feather(df, path, compression=compression,
compression_level=compression_level, version=version)
+
if not os.path.exists(path):
raise Exception('file not written')
result = read_feather(path, columns, use_threads=use_threads)
+
if expected is None:
expected = df
@@ -504,8 +508,10 @@ def test_out_of_float64_timestamp_with_nulls(version):
def test_non_string_columns(version):
df = pd.DataFrame({0: [1, 2, 3, 4],
1: [True, False, True, False]})
+ expected = df
- expected = df.rename(columns=str)
+ if version == 1:
+ expected = df.rename(columns=str)
_check_pandas_roundtrip(df, expected, version=version)
@@ -820,3 +826,15 @@ def test_feather_v017_experimental_compression_backward_compatibility(datadir):
expected = pa.table({'a': range(5)})
result = read_table(datadir / "v0.17.0.version.2-compression.lz4.feather")
assert result.equals(expected)
+
+
+@pytest.mark.pandas
+def test_preserve_index_pandas(version):
+ df = pd.DataFrame({'a': [1, 2, 3]}, index=['a', 'b', 'c'])
+
+ if version == 1:
+ expected = df.reset_index(drop=True).rename(columns=str)
+ else:
+ expected = df
+
+ _check_pandas_roundtrip(df, expected, version=version)