You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/03/10 10:54:16 UTC

[GitHub] [arrow] pitrou commented on a change in pull request #12585: ARROW-15871: [Python] Start raising deprecation warnings for ParquetDataset keywords that won't be supported with the new API

pitrou commented on a change in pull request #12585:
URL: https://github.com/apache/arrow/pull/12585#discussion_r823593260



##########
File path: python/pyarrow/parquet.py
##########
@@ -1440,7 +1450,15 @@ def __init__(self, path_or_paths, filesystem=None, schema=None,
         else:
             self.metadata = metadata
 
-        self.schema = schema
+        if schema is not None:
+            warnings.warn(
+                "Specifying the 'schema' keyword with 'use_legacy_dataset="

Review comment:
       Here as well, say "argument" not "keyword".

##########
File path: python/pyarrow/parquet.py
##########
@@ -1394,14 +1394,24 @@ def __new__(cls, path_or_paths=None, filesystem=None, schema=None,
 
     def __init__(self, path_or_paths, filesystem=None, schema=None,
                  metadata=None, split_row_groups=False, validate_schema=True,
-                 filters=None, metadata_nthreads=1, read_dictionary=None,
+                 filters=None, metadata_nthreads=None, read_dictionary=None,
                  memory_map=False, buffer_size=0, partitioning="hive",
                  use_legacy_dataset=True, pre_buffer=True,
                  coerce_int96_timestamp_unit=None):
         if partitioning != "hive":
             raise ValueError(
                 'Only "hive" for hive-like partitioning is supported when '
                 'using use_legacy_dataset=True')
+        if metadata_nthreads is not None:
+            warnings.warn(
+                "Specifying the 'metadata_nthreads' keyword is deprecated as "
+                "of pyarrow 8.0.0, and the keyword will be removed in a "
+                "future version",

Review comment:
       ```suggestion
                   "Specifying the 'metadata_nthreads' argument is deprecated as "
                   "of pyarrow 8.0.0, and the keyword will be argument in a "
                   "future version",
   ```

##########
File path: python/pyarrow/parquet.py
##########
@@ -1394,14 +1394,24 @@ def __new__(cls, path_or_paths=None, filesystem=None, schema=None,
 
     def __init__(self, path_or_paths, filesystem=None, schema=None,
                  metadata=None, split_row_groups=False, validate_schema=True,
-                 filters=None, metadata_nthreads=1, read_dictionary=None,
+                 filters=None, metadata_nthreads=None, read_dictionary=None,
                  memory_map=False, buffer_size=0, partitioning="hive",
                  use_legacy_dataset=True, pre_buffer=True,
                  coerce_int96_timestamp_unit=None):
         if partitioning != "hive":
             raise ValueError(
                 'Only "hive" for hive-like partitioning is supported when '
                 'using use_legacy_dataset=True')
+        if metadata_nthreads is not None:
+            warnings.warn(
+                "Specifying the 'metadata_nthreads' keyword is deprecated as "
+                "of pyarrow 8.0.0, and the keyword will be removed in a "
+                "future version",

Review comment:
       (may need reformatting after this change?)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org