You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@arrow.apache.org by "Josh (Jira)" <ji...@apache.org> on 2020/08/26 21:14:00 UTC

[jira] [Created] (ARROW-9866) Incorrect timestamp column filtering

Josh created ARROW-9866:
---------------------------

             Summary: Incorrect timestamp column filtering
                 Key: ARROW-9866
                 URL: https://issues.apache.org/jira/browse/ARROW-9866
             Project: Apache Arrow
          Issue Type: Bug
          Components: Python
    Affects Versions: 1.0.0
            Reporter: Josh


Here are some sample test cases:

 
{code:java}
import io
import itertools

import pandas
import pyarrow
import pyarrow.dataset
import pyarrow.parquet
import pytest
import pytz


@pytest.mark.parametrize(
    "data_date, filter_date",
    itertools.product(
        [
            pandas.Timestamp("2000-01-01 00:00:00"),
            pandas.Timestamp("2000-01-01 00:00:00", tz="UTC"),
            pandas.Timestamp("2000-01-01 00:00:00", tz="US/Eastern"),
            pandas.Timestamp("1999-12-31 19:00:00", tz=pytz.FixedOffset(-300)),
        ],
        repeat=2,
    ),
    ids=lambda x: x.isoformat(),
)
def test_timestsamp_filter(data_date, filter_date):
    data_date = pandas.Timestamp(data_date)
    filter_date = pandas.Timestamp(filter_date)
    df = pandas.DataFrame(dict(date=[data_date]))
    try:
        if data_date == filter_date:
            expected = df
        else:
            # empty frame
            expected = df.iloc[:0, :]

    except TypeError:
        # empty frame
        expected = df.iloc[:0, :]
    fileobj = io.BytesIO()
    pyarrow.parquet.write_table(pyarrow.Table.from_pandas(df), fileobj)
    actual = pyarrow.parquet.read_table(fileobj, filters=pyarrow.dataset.field("date") == filter_date).to_pandas()
    pandas.testing.assert_frame_equal(actual, expected)

{code}
 Pytest summary:
{noformat}
=========================== short test summary info ============================
FAILED test_arrow.py::test_timestsamp_filter[2000-01-01T00:00:00-2000-01-01T00:00:00+00:00]
FAILED test_arrow.py::test_timestsamp_filter[2000-01-01T00:00:00-2000-01-01T00:00:00-05:00]
FAILED test_arrow.py::test_timestsamp_filter[2000-01-01T00:00:00+00:00-2000-01-01T00:00:00]
FAILED test_arrow.py::test_timestsamp_filter[2000-01-01T00:00:00+00:00-2000-01-01T00:00:00-05:00]
FAILED test_arrow.py::test_timestsamp_filter[2000-01-01T00:00:00+00:00-1999-12-31T19:00:00-05:00]
FAILED test_arrow.py::test_timestsamp_filter[2000-01-01T00:00:00-05:00-2000-01-01T00:00:00-05:00]
FAILED test_arrow.py::test_timestsamp_filter[1999-12-31T19:00:00-05:00-2000-01-01T00:00:00]
FAILED test_arrow.py::test_timestsamp_filter[1999-12-31T19:00:00-05:00-2000-01-01T00:00:00-05:00]
FAILED test_arrow.py::test_timestsamp_filter[1999-12-31T19:00:00-05:00-1999-12-31T19:00:00-05:00]
========================= 9 failed, 7 passed in 0.23s =========================={noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)