You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ks...@apache.org on 2022/05/03 16:58:25 UTC

[arrow] 02/04: ARROW-16442: [Python][Dataset] Fix fragments of ORC Dataset to use FileFragment class

This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch release-8.0.0
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 64d11b48258ac95af753405bb273e3fd5cde523e
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Tue May 3 13:50:55 2022 +0200

    ARROW-16442: [Python][Dataset] Fix fragments of ORC Dataset to use FileFragment class
    
    Closes #13052 from jorisvandenbossche/ARROW-16442
    
    Authored-by: Joris Van den Bossche <jo...@gmail.com>
    Signed-off-by: Krisztián Szűcs <sz...@gmail.com>
---
 python/pyarrow/_dataset.pyx          | 1 +
 python/pyarrow/tests/test_dataset.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx
index 0abb28c879..9cc93e4e7f 100644
--- a/python/pyarrow/_dataset.pyx
+++ b/python/pyarrow/_dataset.pyx
@@ -840,6 +840,7 @@ cdef class Fragment(_Weakrefable):
             # corresponding subclasses of FileFragment
             'ipc': FileFragment,
             'csv': FileFragment,
+            'orc': FileFragment,
             'parquet': _get_parquet_symbol('ParquetFileFragment'),
         }
 
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index b8e15c597f..44769b4ec0 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -2880,6 +2880,8 @@ def test_orc_format(tempdir, dataset_reader):
     orc.write_table(table, path)
 
     dataset = ds.dataset(path, format=ds.OrcFileFormat())
+    fragments = list(dataset.get_fragments())
+    assert isinstance(fragments[0], ds.FileFragment)
     result = dataset_reader.to_table(dataset)
     result.validate(full=True)
     assert result.equals(table)