You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@arrow.apache.org by "Josh (Jira)" <ji...@apache.org> on 2020/08/26 21:14:00 UTC
[jira] [Created] (ARROW-9866) Incorrect timestamp column filtering
Josh created ARROW-9866:
---------------------------
Summary: Incorrect timestamp column filtering
Key: ARROW-9866
URL: https://issues.apache.org/jira/browse/ARROW-9866
Project: Apache Arrow
Issue Type: Bug
Components: Python
Affects Versions: 1.0.0
Reporter: Josh
Here are some sample test cases:
{code:java}
import io
import itertools
import pandas
import pyarrow
import pyarrow.dataset
import pyarrow.parquet
import pytest
import pytz
@pytest.mark.parametrize(
"data_date, filter_date",
itertools.product(
[
pandas.Timestamp("2000-01-01 00:00:00"),
pandas.Timestamp("2000-01-01 00:00:00", tz="UTC"),
pandas.Timestamp("2000-01-01 00:00:00", tz="US/Eastern"),
pandas.Timestamp("1999-12-31 19:00:00", tz=pytz.FixedOffset(-300)),
],
repeat=2,
),
ids=lambda x: x.isoformat(),
)
def test_timestsamp_filter(data_date, filter_date):
data_date = pandas.Timestamp(data_date)
filter_date = pandas.Timestamp(filter_date)
df = pandas.DataFrame(dict(date=[data_date]))
try:
if data_date == filter_date:
expected = df
else:
# empty frame
expected = df.iloc[:0, :]
except TypeError:
# empty frame
expected = df.iloc[:0, :]
fileobj = io.BytesIO()
pyarrow.parquet.write_table(pyarrow.Table.from_pandas(df), fileobj)
actual = pyarrow.parquet.read_table(fileobj, filters=pyarrow.dataset.field("date") == filter_date).to_pandas()
pandas.testing.assert_frame_equal(actual, expected)
{code}
Pytest summary:
{noformat}
=========================== short test summary info ============================
FAILED test_arrow.py::test_timestsamp_filter[2000-01-01T00:00:00-2000-01-01T00:00:00+00:00]
FAILED test_arrow.py::test_timestsamp_filter[2000-01-01T00:00:00-2000-01-01T00:00:00-05:00]
FAILED test_arrow.py::test_timestsamp_filter[2000-01-01T00:00:00+00:00-2000-01-01T00:00:00]
FAILED test_arrow.py::test_timestsamp_filter[2000-01-01T00:00:00+00:00-2000-01-01T00:00:00-05:00]
FAILED test_arrow.py::test_timestsamp_filter[2000-01-01T00:00:00+00:00-1999-12-31T19:00:00-05:00]
FAILED test_arrow.py::test_timestsamp_filter[2000-01-01T00:00:00-05:00-2000-01-01T00:00:00-05:00]
FAILED test_arrow.py::test_timestsamp_filter[1999-12-31T19:00:00-05:00-2000-01-01T00:00:00]
FAILED test_arrow.py::test_timestsamp_filter[1999-12-31T19:00:00-05:00-2000-01-01T00:00:00-05:00]
FAILED test_arrow.py::test_timestsamp_filter[1999-12-31T19:00:00-05:00-1999-12-31T19:00:00-05:00]
========================= 9 failed, 7 passed in 0.23s =========================={noformat}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)