You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/04/05 13:17:18 UTC

[arrow] branch master updated: ARROW-13922: [Python] Fix ParquetDataset throw error when len(path_or_paths) == 1

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 0015561b35 ARROW-13922: [Python] Fix ParquetDataset throw error when len(path_or_paths) == 1
0015561b35 is described below

commit 0015561b35f413c4b6c7309f2f86a845860c6e7f
Author: Raúl Cumplido <ra...@gmail.com>
AuthorDate: Tue Apr 5 15:16:41 2022 +0200

    ARROW-13922: [Python] Fix ParquetDataset throw error when len(path_or_paths) == 1
    
    This PR fixes a bug when a list with a single element was used on `ParquestDataset.read()`.
    
    Closes #12797 from raulcd/ARROW-13922
    
    Authored-by: Raúl Cumplido <ra...@gmail.com>
    Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
 python/pyarrow/parquet.py                    |  5 +----
 python/pyarrow/tests/parquet/test_dataset.py | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 8ddc0e75c6..18112b41e6 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -1791,10 +1791,7 @@ class _ParquetDatasetV2:
 
         # check for single fragment dataset
         single_file = None
-        if isinstance(path_or_paths, list):
-            if len(path_or_paths) == 1:
-                single_file = path_or_paths[0]
-        else:
+        if not isinstance(path_or_paths, list):
             if _is_path_like(path_or_paths):
                 path_or_paths = _stringify_path(path_or_paths)
                 if filesystem is None:
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index b816aed9d8..2534cce73c 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -639,6 +639,20 @@ def test_read_directory_s3fs(s3_example_s3fs, use_legacy_dataset):
     assert result.equals(table)
 
 
+@pytest.mark.pandas
+@parametrize_legacy_dataset
+def test_read_single_file_list(tempdir, use_legacy_dataset):
+    data_path = str(tempdir / 'data.parquet')
+
+    table = pa.table({"a": [1, 2, 3]})
+    _write_table(table, data_path)
+
+    result = pq.ParquetDataset(
+        [data_path], use_legacy_dataset=use_legacy_dataset
+    ).read()
+    assert result.equals(table)
+
+
 @pytest.mark.pandas
 @pytest.mark.s3
 @parametrize_legacy_dataset