You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/04/05 13:17:18 UTC
[arrow] branch master updated: ARROW-13922: [Python] Fix ParquetDataset throw error when len(path_or_paths) == 1
This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 0015561b35 ARROW-13922: [Python] Fix ParquetDataset throw error when len(path_or_paths) == 1
0015561b35 is described below
commit 0015561b35f413c4b6c7309f2f86a845860c6e7f
Author: Raúl Cumplido <ra...@gmail.com>
AuthorDate: Tue Apr 5 15:16:41 2022 +0200
ARROW-13922: [Python] Fix ParquetDataset throw error when len(path_or_paths) == 1
This PR fixes a bug when a list with a single element was used on `ParquestDataset.read()`.
Closes #12797 from raulcd/ARROW-13922
Authored-by: Raúl Cumplido <ra...@gmail.com>
Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
python/pyarrow/parquet.py | 5 +----
python/pyarrow/tests/parquet/test_dataset.py | 14 ++++++++++++++
2 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 8ddc0e75c6..18112b41e6 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -1791,10 +1791,7 @@ class _ParquetDatasetV2:
# check for single fragment dataset
single_file = None
- if isinstance(path_or_paths, list):
- if len(path_or_paths) == 1:
- single_file = path_or_paths[0]
- else:
+ if not isinstance(path_or_paths, list):
if _is_path_like(path_or_paths):
path_or_paths = _stringify_path(path_or_paths)
if filesystem is None:
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index b816aed9d8..2534cce73c 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -639,6 +639,20 @@ def test_read_directory_s3fs(s3_example_s3fs, use_legacy_dataset):
assert result.equals(table)
+@pytest.mark.pandas
+@parametrize_legacy_dataset
+def test_read_single_file_list(tempdir, use_legacy_dataset):
+ data_path = str(tempdir / 'data.parquet')
+
+ table = pa.table({"a": [1, 2, 3]})
+ _write_table(table, data_path)
+
+ result = pq.ParquetDataset(
+ [data_path], use_legacy_dataset=use_legacy_dataset
+ ).read()
+ assert result.equals(table)
+
+
@pytest.mark.pandas
@pytest.mark.s3
@parametrize_legacy_dataset