You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/01/13 09:44:16 UTC

[GitHub] [arrow] jorisvandenbossche commented on a change in pull request #12076: ARROW-10317: [Python] Document compute function options

jorisvandenbossche commented on a change in pull request #12076:
URL: https://github.com/apache/arrow/pull/12076#discussion_r783769781



##########
File path: python/pyarrow/compute.py
##########
@@ -105,66 +117,70 @@ def _decorate_compute_function(wrapper, exposed_name, func, options_class):
     doc_pieces = []
 
     # 1. One-line summary
-    cpp_doc = func._doc
     summary = cpp_doc.summary
     if not summary:
         arg_str = "arguments" if func.arity > 1 else "argument"
         summary = ("Call compute function {!r} with the given {}"
                    .format(func.name, arg_str))
 
-    description = cpp_doc.description
-    arg_names = _get_arg_names(func)
-
-    doc_pieces.append("""\
-        {}.
-
-        """.format(summary))
+    doc_pieces.append(f"{summary}.\n\n")
 
     # 2. Multi-line description
+    description = cpp_doc.description
     if description:
-        doc_pieces.append("{}\n\n".format(description))
+        doc_pieces.append(f"{description}\n\n")
 
     doc_addition = _compute_docstrings.function_doc_additions.get(func.name)
 
     # 3. Parameter description
-    doc_pieces.append("""\
+    doc_pieces.append(dedent("""\
         Parameters
         ----------
-        """)
+        """))
 
+    # 3a. Compute function parameters
+    arg_names = _get_arg_names(func)
     for arg_name in arg_names:
         if func.kind in ('vector', 'scalar_aggregate'):
             arg_type = 'Array-like'
         else:
             arg_type = 'Array-like or scalar-like'
-        doc_pieces.append("""\
-            {} : {}
-                Argument to compute function
-            """.format(arg_name, arg_type))
+        doc_pieces.append(f"{arg_name} : {arg_type}\n")
+        doc_pieces.append("    Argument to compute function.\n")
 
+    # 3b. Compute function option values
     if options_class is not None:
-        options_sig = inspect.signature(options_class)
-        for p in options_sig.parameters.values():
-            doc_pieces.append("""\
-            {0} : optional
-                Parameter for {1} constructor. Either `options`
-                or `{0}` can be passed, but not both at the same time.
-            """.format(p.name, options_class.__name__))
-        doc_pieces.append("""\
-            options : pyarrow.compute.{0}, optional
-                Parameters altering compute function semantics.
-            """.format(options_class.__name__))
-
-    doc_pieces.append("""\
+        options_class_doc = _scrape_options_class_doc(options_class)
+        if options_class_doc:
+            for p in options_class_doc.params:
+                doc_pieces.append(f"{p.name} : {p.type}\n")
+                for s in p.desc:
+                    doc_pieces.append(f"    {s}\n")
+        else:
+            warnings.warn(f"Options class {options_class.__name__} "
+                          f"does not have a docstring", RuntimeWarning)
+            options_sig = inspect.signature(options_class)
+            for p in options_sig.parameters.values():
+                doc_pieces.append(dedent("""\
+                {0} : optional
+                    Parameter for {1} constructor. Either `options`
+                    or `{0}` can be passed, but not both at the same time.
+                """.format(p.name, options_class.__name__)))
+        doc_pieces.append(dedent(f"""\
+            options : pyarrow.compute.{options_class.__name__}, optional
+                Alternative way of passing options.
+            """))
+
+    doc_pieces.append(dedent("""\
         memory_pool : pyarrow.MemoryPool, optional
             If not passed, will allocate memory from the default memory pool.
-        """)
+        """))
 
     # 4. Custom addition (e.g. examples)
     if doc_addition is not None:
-        doc_pieces.append("\n{}\n".format(doc_addition.strip("\n")))
+        doc_pieces.append("\n{}\n".format(dedent(doc_addition).strip("\n")))
 
-    wrapper.__doc__ = "".join(dedent(s) for s in doc_pieces)
+    wrapper.__doc__ = "".join(s for s in doc_pieces)

Review comment:
       ```suggestion
       wrapper.__doc__ = "".join(doc_pieces)
   ```
   
   `doc_pieces` is already a list I think? (so now we don't need the `dedent`, also don't need the loop)

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -690,6 +690,26 @@ cdef class _CastOptions(FunctionOptions):
 
 
 class CastOptions(_CastOptions):
+    """
+    Options for the `cast` function.
+
+    Parameters
+    ----------
+    target_type : DataType, optional
+        The PyArrow type to cast to.
+    allow_int_overflow : bool, optional
+        Whether integer overflow is allowed when casting.

Review comment:
       All those options indicate "optional", but I suppose they have an actual default? (all False?)

##########
File path: python/pyarrow/tests/test_compute.py
##########
@@ -700,30 +702,126 @@ def test_generated_docstrings():
         Parameters
         ----------
         x : Array-like or scalar-like
-            Argument to compute function
+            Argument to compute function.
         y : Array-like or scalar-like
-            Argument to compute function
+            Argument to compute function.
         memory_pool : pyarrow.MemoryPool, optional
             If not passed, will allocate memory from the default memory pool.
         """)
+    # Varargs with options
+    assert pc.min_element_wise.__doc__ == textwrap.dedent("""\
+        Find the element-wise minimum value.
+
+        Nulls are ignored (by default) or propagated.
+        NaN is preferred over null, but not over any valid value.
+
+        Parameters
+        ----------
+        *args : Array-like or scalar-like
+            Argument to compute function.
+        skip_nulls : bool, default True
+            Whether to skip (ignore) nulls in the input.
+            If False, any null in the input forces the output to null.
+        options : pyarrow.compute.ElementWiseAggregateOptions, optional
+            Alternative way of passing options.
+        memory_pool : pyarrow.MemoryPool, optional
+            If not passed, will allocate memory from the default memory pool.
+        """)
+    # Nullary with options
+    assert pc.random.__doc__ == textwrap.dedent("""\
+        Generate numbers in the range [0, 1).
+
+        Generated values are uniformly-distributed, double-precision """ +
+                                                """in range [0, 1).

Review comment:
       ```suggestion
           Generated values are uniformly-distributed, double-precision \
   in range [0, 1).
   ```
   
   This is an alternative way to do this, doesn't need to end/start triple quotes, but because of the strange indentation not necessarily nicer ..

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -950,6 +1132,16 @@ cdef class _FilterOptions(FunctionOptions):
 
 
 class FilterOptions(_FilterOptions):
+    """
+    Options for selecting with a boolean filter.
+
+    Parameters
+    ----------
+    null_selection_behavior : str, default "drop"
+        How to handle nulls in the input.

Review comment:
       ```suggestion
           How to handle nulls in the selection filter.
   ```
   
   (the `filter` docstring uses "input" for the values that are being filtered)

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -1326,6 +1781,25 @@ cdef class _QuantileOptions(FunctionOptions):
 
 
 class QuantileOptions(_QuantileOptions):
+    __doc__ = f"""
+    Options for the `quantile` function.
+
+    Parameters
+    ----------
+    q : double or sequence of double, default 0.5
+        Quantiles to compute. All values must be in [0, 1].
+    interpolation : str, default "linear"
+        How to break ties between competing data points for a given quantile.
+        Accepted values are:
+        - "linear": compute an interpolation

Review comment:
       ```suggestion
   
           - "linear": compute an interpolation
   ```
   
   (it's ugly, but I seem to remember this is needed for proper rendering in the online docstring pages -> lists need to be surrounded by empty line in rst, but this is only within the description, so no white line is needed at the end of the description)

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -907,7 +1023,23 @@ cdef class _ReplaceSubstringOptions(FunctionOptions):
 
 
 class ReplaceSubstringOptions(_ReplaceSubstringOptions):
-    def __init__(self, pattern, replacement, *, max_replacements=-1):
+    """
+    Options for replacing matched substrings.
+
+    Parameters
+    ----------
+    pattern : str
+        Substring pattern to look for inside input values.
+    replacement : str
+        What to replace the pattern with.
+    max_replacements : int or None, default None
+        If given, the maximum number of strings to replace in each
+        input value.

Review comment:
       If not given, there is no limit in number of replacements?

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -1027,6 +1266,15 @@ cdef class _ScalarAggregateOptions(FunctionOptions):
 
 
 class ScalarAggregateOptions(_ScalarAggregateOptions):
+    __doc__ = f"""
+    Options for scalar aggregations.
+
+    Parameters
+    ----------
+    {_skip_nulls_doc()}

Review comment:
       You mean that they will stay the same for ScalarAggregateOptions vs ElementWiseAggregateOptions vs ... (the different places it is being used)? 
   
   It seems quite unlikely to me that the general description will not be correct anymore for one of those functions, but we can also always then update it if that would change (also if we all inline them duplicated, we need to think about updating the docstring if behaviour in C++ would change ..)

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -907,7 +1023,23 @@ cdef class _ReplaceSubstringOptions(FunctionOptions):
 
 
 class ReplaceSubstringOptions(_ReplaceSubstringOptions):
-    def __init__(self, pattern, replacement, *, max_replacements=-1):
+    """
+    Options for replacing matched substrings.
+
+    Parameters
+    ----------
+    pattern : str
+        Substring pattern to look for inside input values.
+    replacement : str
+        What to replace the pattern with.
+    max_replacements : int or None, default None
+        If given, the maximum number of strings to replace in each
+        input value.

Review comment:
       For SplitOptions, you used "(unlimited if None)"




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org