You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/04/21 12:20:23 UTC
[arrow] branch master updated: ARROW-16120: [Python] ParquetDataset deprecation: change Deprecation to FutureWarnings
This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 3b7c0e2958 ARROW-16120: [Python] ParquetDataset deprecation: change Deprecation to FutureWarnings
3b7c0e2958 is described below
commit 3b7c0e29582c8d7cb5180633569eb274fe9e42a0
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Thu Apr 21 14:20:14 2022 +0200
ARROW-16120: [Python] ParquetDataset deprecation: change Deprecation to FutureWarnings
\+ suppressing some additional warnings when running the tests
Closes #12798 from jorisvandenbossche/ARROW-16120
Authored-by: Joris Van den Bossche <jo...@gmail.com>
Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
python/pyarrow/parquet/__init__.py | 26 +++++++--------
python/pyarrow/tests/parquet/common.py | 25 ++++++++++----
python/pyarrow/tests/parquet/test_basic.py | 2 +-
python/pyarrow/tests/parquet/test_dataset.py | 49 +++++++++++++++-------------
4 files changed, 59 insertions(+), 43 deletions(-)
diff --git a/python/pyarrow/parquet/__init__.py b/python/pyarrow/parquet/__init__.py
index 867babdaf8..f616b04e1d 100644
--- a/python/pyarrow/parquet/__init__.py
+++ b/python/pyarrow/parquet/__init__.py
@@ -1059,7 +1059,7 @@ class ParquetDatasetPiece:
warnings.warn(
"ParquetDatasetPiece is deprecated as of pyarrow 5.0.0 and will "
"be removed in a future version.",
- DeprecationWarning, stacklevel=2)
+ FutureWarning, stacklevel=2)
self._init(
path, open_file_func, file_options, row_group, partition_keys)
@@ -1692,7 +1692,7 @@ Examples
"Specifying the 'metadata_nthreads' argument is deprecated as "
"of pyarrow 8.0.0, and the argument will be removed in a "
"future version",
- DeprecationWarning, stacklevel=2,
+ FutureWarning, stacklevel=2,
)
else:
metadata_nthreads = 1
@@ -1742,7 +1742,7 @@ Examples
"specify it in combination with 'use_legacy_dataet=False', "
"but in that case you need to specify a pyarrow.Schema "
"instead of a ParquetSchema.",
- DeprecationWarning, stacklevel=2)
+ FutureWarning, stacklevel=2)
self._schema = schema
self.split_row_groups = split_row_groups
@@ -1953,7 +1953,7 @@ Examples
" Specify 'use_legacy_dataset=False' while constructing the "
"ParquetDataset, and then use the '.fragments' attribute "
"instead."),
- DeprecationWarning, stacklevel=2)
+ FutureWarning, stacklevel=2)
return self._pieces
@property
@@ -1967,7 +1967,7 @@ Examples
" Specify 'use_legacy_dataset=False' while constructing the "
"ParquetDataset, and then use the '.partitioning' attribute "
"instead."),
- DeprecationWarning, stacklevel=2)
+ FutureWarning, stacklevel=2)
return self._partitions
@property
@@ -1979,7 +1979,7 @@ Examples
"ParquetDataset, and then use the '.schema' attribute "
"instead (which will return an Arrow schema instead of a "
"Parquet schema)."),
- DeprecationWarning, stacklevel=2)
+ FutureWarning, stacklevel=2)
return self._schema
@property
@@ -1989,7 +1989,7 @@ Examples
"""
warnings.warn(
_DEPR_MSG.format("ParquetDataset.memory_map", ""),
- DeprecationWarning, stacklevel=2)
+ FutureWarning, stacklevel=2)
return self._metadata.memory_map
@property
@@ -1999,7 +1999,7 @@ Examples
"""
warnings.warn(
_DEPR_MSG.format("ParquetDataset.read_dictionary", ""),
- DeprecationWarning, stacklevel=2)
+ FutureWarning, stacklevel=2)
return self._metadata.read_dictionary
@property
@@ -2009,7 +2009,7 @@ Examples
"""
warnings.warn(
_DEPR_MSG.format("ParquetDataset.buffer_size", ""),
- DeprecationWarning, stacklevel=2)
+ FutureWarning, stacklevel=2)
return self._metadata.buffer_size
_fs = property(
@@ -2027,7 +2027,7 @@ Examples
" Specify 'use_legacy_dataset=False' while constructing the "
"ParquetDataset, and then use the '.filesystem' attribute "
"instead."),
- DeprecationWarning, stacklevel=2)
+ FutureWarning, stacklevel=2)
return self._metadata.fs
_common_metadata = property(
@@ -2041,7 +2041,7 @@ Examples
"""
warnings.warn(
_DEPR_MSG.format("ParquetDataset.common_metadata", ""),
- DeprecationWarning, stacklevel=2)
+ FutureWarning, stacklevel=2)
return self._metadata.common_metadata
@property
@@ -2453,7 +2453,7 @@ class _ParquetDatasetV2:
warnings.warn(
_DEPR_MSG.format("ParquetDataset.pieces",
" Use the '.fragments' attribute instead"),
- DeprecationWarning, stacklevel=2)
+ FutureWarning, stacklevel=2)
return list(self._dataset.get_fragments())
@property
@@ -2744,7 +2744,7 @@ def read_table(source, columns=None, use_threads=True, metadata=None,
"Passing 'use_legacy_dataset=True' to get the legacy behaviour is "
"deprecated as of pyarrow 8.0.0, and the legacy implementation will "
"be removed in a future version.",
- DeprecationWarning, stacklevel=2)
+ FutureWarning, stacklevel=2)
if ignore_prefixes is not None:
raise ValueError(
diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
index 551db4a8fe..b11e412e63 100644
--- a/python/pyarrow/tests/parquet/common.py
+++ b/python/pyarrow/tests/parquet/common.py
@@ -23,14 +23,25 @@ import pytest
import pyarrow as pa
from pyarrow.tests import util
+legacy_filter_mark = pytest.mark.filterwarnings(
+ "ignore:Passing 'use_legacy:FutureWarning"
+)
+
parametrize_legacy_dataset = pytest.mark.parametrize(
"use_legacy_dataset",
- [True, pytest.param(False, marks=pytest.mark.dataset)])
+ [pytest.param(True, marks=legacy_filter_mark),
+ pytest.param(False, marks=pytest.mark.dataset)]
+)
parametrize_legacy_dataset_not_supported = pytest.mark.parametrize(
- "use_legacy_dataset", [True, pytest.param(False, marks=pytest.mark.skip)])
+ "use_legacy_dataset",
+ [pytest.param(True, marks=legacy_filter_mark),
+ pytest.param(False, marks=pytest.mark.skip)]
+)
parametrize_legacy_dataset_fixed = pytest.mark.parametrize(
- "use_legacy_dataset", [pytest.param(True, marks=pytest.mark.xfail),
- pytest.param(False, marks=pytest.mark.dataset)])
+ "use_legacy_dataset",
+ [pytest.param(True, marks=[pytest.mark.xfail, legacy_filter_mark]),
+ pytest.param(False, marks=pytest.mark.dataset)]
+)
# Marks all of the tests in this module
# Ignore these with pytest ... -m 'not parquet'
@@ -58,7 +69,7 @@ def _read_table(*args, **kwargs):
def _roundtrip_table(table, read_table_kwargs=None,
- write_table_kwargs=None, use_legacy_dataset=True):
+ write_table_kwargs=None, use_legacy_dataset=False):
read_table_kwargs = read_table_kwargs or {}
write_table_kwargs = write_table_kwargs or {}
@@ -70,7 +81,7 @@ def _roundtrip_table(table, read_table_kwargs=None,
def _check_roundtrip(table, expected=None, read_table_kwargs=None,
- use_legacy_dataset=True, **write_table_kwargs):
+ use_legacy_dataset=False, **write_table_kwargs):
if expected is None:
expected = table
@@ -87,7 +98,7 @@ def _check_roundtrip(table, expected=None, read_table_kwargs=None,
assert result.equals(expected)
-def _roundtrip_pandas_dataframe(df, write_kwargs, use_legacy_dataset=True):
+def _roundtrip_pandas_dataframe(df, write_kwargs, use_legacy_dataset=False):
table = pa.Table.from_pandas(df)
result = _roundtrip_table(
table, write_table_kwargs=write_kwargs,
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index e82e3a36df..edcfb0dc4c 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -795,6 +795,6 @@ def test_read_table_legacy_deprecated(tempdir):
pq.write_table(table, path)
with pytest.warns(
- DeprecationWarning, match="Passing 'use_legacy_dataset=True'"
+ FutureWarning, match="Passing 'use_legacy_dataset=True'"
):
pq.read_table(path, use_legacy_dataset=True)
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index 6326743113..6477132dcd 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -55,7 +55,7 @@ def test_parquet_piece_read(tempdir):
path = tempdir / 'parquet_piece_read.parquet'
_write_table(table, path, version='2.6')
- with pytest.warns(DeprecationWarning):
+ with pytest.warns(FutureWarning):
piece1 = pq.ParquetDatasetPiece(path)
result = piece1.read()
@@ -70,7 +70,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir):
path = tempdir / 'parquet_piece_read.parquet'
_write_table(table, path, version='2.6')
- with pytest.warns(DeprecationWarning):
+ with pytest.warns(FutureWarning):
piece = pq.ParquetDatasetPiece(path)
table1 = piece.read()
assert isinstance(table1, pa.Table)
@@ -80,7 +80,7 @@ def test_parquet_piece_open_and_get_metadata(tempdir):
assert table.equals(table1)
-@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:FutureWarning")
def test_parquet_piece_basics():
path = '/baz.parq'
@@ -140,7 +140,7 @@ def test_read_partitioned_directory(tempdir, use_legacy_dataset):
_partition_test_for_filesystem(fs, tempdir, use_legacy_dataset)
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
@pytest.mark.pandas
def test_create_parquet_dataset_multi_threaded(tempdir):
fs = LocalFileSystem._get_instance()
@@ -151,7 +151,7 @@ def test_create_parquet_dataset_multi_threaded(tempdir):
manifest = pq.ParquetManifest(base_path, filesystem=fs,
metadata_nthreads=1)
with pytest.warns(
- DeprecationWarning, match="Specifying the 'metadata_nthreads'"
+ FutureWarning, match="Specifying the 'metadata_nthreads'"
):
dataset = pq.ParquetDataset(
base_path, filesystem=fs, metadata_nthreads=16)
@@ -801,14 +801,14 @@ def _test_read_common_metadata_files(fs, base_path):
@pytest.mark.pandas
-@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
def test_read_common_metadata_files(tempdir):
fs = LocalFileSystem._get_instance()
_test_read_common_metadata_files(fs, tempdir)
@pytest.mark.pandas
-@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
def test_read_metadata_files(tempdir):
fs = LocalFileSystem._get_instance()
@@ -922,7 +922,7 @@ def test_read_multiple_files(tempdir, use_legacy_dataset):
result2 = read_multiple_files(paths, metadata=metadata)
assert result2.equals(expected)
- with pytest.warns(DeprecationWarning, match="Specifying the 'schema'"):
+ with pytest.warns(FutureWarning, match="Specifying the 'schema'"):
result3 = pq.ParquetDataset(dirpath, schema=metadata.schema).read()
assert result3.equals(expected)
else:
@@ -968,7 +968,7 @@ def test_read_multiple_files(tempdir, use_legacy_dataset):
mixed_paths = [bad_apple_path, paths[0]]
with pytest.raises(ValueError):
- with pytest.warns(DeprecationWarning, match="Specifying the 'schema'"):
+ with pytest.warns(FutureWarning, match="Specifying the 'schema'"):
read_multiple_files(mixed_paths, schema=bad_meta.schema)
with pytest.raises(ValueError):
@@ -1014,7 +1014,7 @@ def test_dataset_read_pandas(tempdir, use_legacy_dataset):
tm.assert_frame_equal(result.reindex(columns=expected.columns), expected)
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
@pytest.mark.pandas
@parametrize_legacy_dataset
def test_dataset_memory_map(tempdir, use_legacy_dataset):
@@ -1217,7 +1217,7 @@ def test_empty_directory(tempdir, use_legacy_dataset):
assert result.num_columns == 0
-@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
def _test_write_to_dataset_with_partitions(base_path,
use_legacy_dataset=True,
filesystem=None,
@@ -1259,7 +1259,7 @@ def _test_write_to_dataset_with_partitions(base_path,
use_legacy_dataset=use_legacy_dataset)
# ARROW-2209: Ensure the dataset schema also includes the partition columns
if use_legacy_dataset:
- with pytest.warns(DeprecationWarning, match="'ParquetDataset.schema'"):
+ with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"):
dataset_cols = set(dataset.schema.to_arrow_schema().names)
else:
# NB schema property is an arrow and not parquet schema
@@ -1409,7 +1409,7 @@ def test_write_to_dataset_no_partitions_s3fs(
path, use_legacy_dataset, filesystem=fs)
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
@pytest.mark.pandas
@parametrize_legacy_dataset_not_supported
def test_write_to_dataset_with_partitions_and_custom_filenames(
@@ -1569,6 +1569,7 @@ def test_dataset_read_dictionary(tempdir, use_legacy_dataset):
@pytest.mark.dataset
@pytest.mark.pandas
+@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning")
def test_read_table_schema(tempdir):
# test that schema keyword is passed through in read_table
table = pa.table({'a': pa.array([1, 2, 3], pa.int32())})
@@ -1622,6 +1623,7 @@ def test_dataset_unsupported_keywords():
@pytest.mark.dataset
+@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning")
def test_dataset_partitioning(tempdir):
import pyarrow.dataset as ds
@@ -1669,7 +1671,7 @@ def test_parquet_dataset_new_filesystem(tempdir):
assert result.equals(table)
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:DeprecationWarning")
+@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
def test_parquet_dataset_partitions_piece_path_with_fsspec(tempdir):
# ARROW-10462 ensure that on Windows we properly use posix-style paths
# as used by fsspec
@@ -1693,30 +1695,33 @@ def test_parquet_dataset_deprecated_properties(tempdir):
pq.write_table(table, path)
dataset = pq.ParquetDataset(path)
- with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"):
+ with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"):
dataset.pieces
- with pytest.warns(DeprecationWarning, match="'ParquetDataset.partitions"):
+ with pytest.warns(FutureWarning, match="'ParquetDataset.partitions"):
dataset.partitions
- with pytest.warns(DeprecationWarning, match="'ParquetDataset.memory_map"):
+ with pytest.warns(FutureWarning, match="'ParquetDataset.memory_map"):
dataset.memory_map
- with pytest.warns(DeprecationWarning, match="'ParquetDataset.read_dictio"):
+ with pytest.warns(FutureWarning, match="'ParquetDataset.read_dictio"):
dataset.read_dictionary
- with pytest.warns(DeprecationWarning, match="'ParquetDataset.buffer_size"):
+ with pytest.warns(FutureWarning, match="'ParquetDataset.buffer_size"):
dataset.buffer_size
- with pytest.warns(DeprecationWarning, match="'ParquetDataset.fs"):
+ with pytest.warns(FutureWarning, match="'ParquetDataset.fs"):
dataset.fs
- with pytest.warns(DeprecationWarning, match="'ParquetDataset.schema'"):
+ with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"):
dataset.schema
+ with pytest.warns(FutureWarning, match="'ParquetDataset.common_metadata'"):
+ dataset.common_metadata
+
dataset2 = pq.ParquetDataset(path, use_legacy_dataset=False)
- with pytest.warns(DeprecationWarning, match="'ParquetDataset.pieces"):
+ with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"):
dataset2.pieces