You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2020/10/14 20:57:34 UTC

[GitHub] [arrow] bkietz commented on a change in pull request #8457: ARROW-9164: [C++] Add embedded documentation to compute functions

bkietz commented on a change in pull request #8457:
URL: https://github.com/apache/arrow/pull/8457#discussion_r504891193



##########
File path: python/pyarrow/compute.py
##########
@@ -47,34 +47,60 @@
 import pyarrow as pa
 
 
+def _get_arg_names(func):
+    arg_names = func._doc.arg_names
+    if not arg_names:
+        if func.arity == 1:
+            arg_names = ["arg"]
+        elif func.arity == 2:
+            arg_names = ["left", "right"]
+        else:
+            raise NotImplementedError(
+                "unsupported arity: {}".format(func.arity))

Review comment:
       I think it'd probably be better to keep this as simple as possible:
   ```suggestion
       if not arg_names:
           arg_names = ["arg{}".format(i) for i in range(func.arity)]
   ```

##########
File path: python/pyarrow/compute.py
##########
@@ -93,20 +119,10 @@ def _decorate_compute_function(wrapper, exposed_name, func, option_class):
     return wrapper
 
 
-_option_classes = {
-    # TODO: export the option class name from C++ metadata?
-    'cast': CastOptions,
-    'filter': FilterOptions,
-    'index_in': SetLookupOptions,
-    'is_in': SetLookupOptions,
-    'match_substring': MatchSubstringOptions,
-    'min_max': MinMaxOptions,
-    'partition_nth_indices': PartitionNthOptions,
-    'stddev': VarianceOptions,
-    'strptime': StrptimeOptions,
-    'take': TakeOptions,
-    'variance': VarianceOptions,
-}
+def _get_options_class(func):
+    class_name = func._doc.options_class
+    # XXX should we raise if the class isn't exposed in Python?
+    return globals().get(class_name) if class_name else None

Review comment:
       I think we should definitely raise
   ```suggestion
       klass =  globals().get(func._doc.options_class, None)
       if klass is not None:
           return klass
       raise NotImplemented("Accessing options for {} from python".format(func))
   ```

##########
File path: python/pyarrow/compute.py
##########
@@ -130,32 +146,38 @@ def _handle_options(name, option_class, options, kwargs):
     return options
 
 
-def _simple_unary_function(name):
-    func = get_function(name)
-    option_class = _option_classes.get(name)
-
-    if option_class is not None:
-        def wrapper(arg, *, options=None, memory_pool=None, **kwargs):
-            options = _handle_options(name, option_class, options, kwargs)
-            return func.call([arg], options, memory_pool)
-    else:
-        def wrapper(arg, *, memory_pool=None):
-            return func.call([arg], None, memory_pool)
-
-    return _decorate_compute_function(wrapper, name, func, option_class)
-
-
-def _simple_binary_function(name):
-    func = get_function(name)
-    option_class = _option_classes.get(name)
-
+_wrapper_template = dedent("""\
+    def make_wrapper(func, option_class):
+        def {func_name}({args_sig}, *, memory_pool=None):
+            return func.call([{args_sig}], None, memory_pool)
+        return {func_name}
+    """)
+
+_wrapper_options_template = dedent("""\
+    def make_wrapper(func, option_class):
+        def {func_name}({args_sig}, *, options=None, memory_pool=None,
+                        **kwargs):
+            options = _handle_options({func_name!r}, option_class, options,
+                                      kwargs)
+            return func.call([{args_sig}], options, memory_pool)
+        return {func_name}
+    """)

Review comment:
       I guess there isn't a way to accomplish this without `exec()` but these wrappers seem brittle in the extreme. Could we minimize the exec'd code by setting `__wrapped__ = eval('lambda {}: None'.format(args_sig))` to get the arguments right, then set `__name__`, `__qualname__` etc directly?

##########
File path: python/pyarrow/compute.py
##########
@@ -130,32 +146,38 @@ def _handle_options(name, option_class, options, kwargs):
     return options
 
 
-def _simple_unary_function(name):
-    func = get_function(name)
-    option_class = _option_classes.get(name)
-
-    if option_class is not None:
-        def wrapper(arg, *, options=None, memory_pool=None, **kwargs):
-            options = _handle_options(name, option_class, options, kwargs)
-            return func.call([arg], options, memory_pool)
-    else:
-        def wrapper(arg, *, memory_pool=None):
-            return func.call([arg], None, memory_pool)
-
-    return _decorate_compute_function(wrapper, name, func, option_class)
-
-
-def _simple_binary_function(name):
-    func = get_function(name)
-    option_class = _option_classes.get(name)
-
+_wrapper_template = dedent("""\
+    def make_wrapper(func, option_class):
+        def {func_name}({args_sig}, *, memory_pool=None):
+            return func.call([{args_sig}], None, memory_pool)
+        return {func_name}
+    """)
+
+_wrapper_options_template = dedent("""\
+    def make_wrapper(func, option_class):
+        def {func_name}({args_sig}, *, options=None, memory_pool=None,
+                        **kwargs):
+            options = _handle_options({func_name!r}, option_class, options,
+                                      kwargs)
+            return func.call([{args_sig}], options, memory_pool)
+        return {func_name}
+    """)
+
+
+def _wrap_function(name, func):
+    option_class = _get_options_class(func)
+    arg_names = _get_arg_names(func)
+    args_sig = ', '.join(arg_names)
+

Review comment:
       ```suggestion
           if option_class is None:
               def wrapper(*args, *, memory_pool=None):
                   return func.call(args, None, memory_pool)
               wrapped_template = 'lambda {}, *, memory_pool=None: ()'
           else:
               def wrapper(*args, *, options=None, memory_pool=None, **kwargs):
                   options = _handle_options(name, option_class, options, kwargs)
                   return func.call(args, options, memory_pool)
               wrapped_template = 'lambda {}, *, options=None, memory_pool=None, **kwargs: ()'
   
           wrapper.__name__ = name
           wrapper.__qualname__ = name
           wrapper.__wrapped__ = eval(wrapped_template.format(args_sig))
   ```

##########
File path: python/pyarrow/compute.py
##########
@@ -47,34 +47,60 @@
 import pyarrow as pa
 
 
+def _get_arg_names(func):
+    arg_names = func._doc.arg_names
+    if not arg_names:
+        if func.arity == 1:
+            arg_names = ["arg"]
+        elif func.arity == 2:
+            arg_names = ["left", "right"]
+        else:
+            raise NotImplementedError(
+                "unsupported arity: {}".format(func.arity))
+
+    return arg_names
+
+
 def _decorate_compute_function(wrapper, exposed_name, func, option_class):
     wrapper.__arrow_compute_function__ = dict(name=func.name,
                                               arity=func.arity)
     wrapper.__name__ = exposed_name
     wrapper.__qualname__ = exposed_name
 
-    # TODO (ARROW-9164): expose actual docstring from C++
     doc_pieces = []
-    arg_str = "arguments" if func.arity > 1 else "argument"
+
+    cpp_doc = func._doc
+    summary = cpp_doc.summary
+    if not summary:
+        arg_str = "arguments" if func.arity > 1 else "argument"
+        summary = ("Call compute function {!r} with the given {}"
+                   .format(func.name, arg_str))
+
+    description = cpp_doc.description
+    arg_names = _get_arg_names(func)
+
     doc_pieces.append("""\
-        Call compute function {!r} with the given {}.
+        {}.
+
+        """.format(summary))
 
+    if description:
+        doc_pieces.append("{}\n\n".format(description))
+
+    doc_pieces.append("""\
         Parameters
         ----------
-        """.format(func.name, arg_str))
+        """)
 
-    if func.arity == 1:
+    for arg_name in arg_names:
+        if func.kind in ('vector', 'scalar_aggregate'):
+            arg_type = 'Array-like'
+        else:
+            arg_type = 'Array-like or scalar-like'

Review comment:
       I suppose at some point we'll want to expose `KernelSignature` or otherwise examine what inputs the kernels actually accept in order to generate this more rigorously

##########
File path: cpp/src/arrow/compute/function.h
##########
@@ -137,19 +169,23 @@ class ARROW_EXPORT Function {
   /// that default_options() is valid to pass to Execute as options.
   const FunctionOptions* default_options() const { return default_options_; }
 
+  virtual Status Validate() const;
+
  protected:
-  Function(std::string name, Function::Kind kind, const Arity& arity,
+  Function(std::string name, Function::Kind kind, const Arity& arity, FunctionDoc doc,
            const FunctionOptions* default_options)
       : name_(std::move(name)),
         kind_(kind),
         arity_(arity),
+        doc_(std::move(doc)),
         default_options_(default_options) {}
 
   Status CheckArity(int passed_num_args) const;
 
   std::string name_;
   Function::Kind kind_;
   Arity arity_;
+  FunctionDoc doc_;
   const FunctionOptions* default_options_ = NULLPTR;

Review comment:
       Since all `FunctionDoc` instances are statically allocated, we could just refer to them with non-owning pointers:
   ```suggestion
     Function(std::string name, Function::Kind kind, const Arity& arity,
              const FunctionOptions* default_options,
              const FunctionDoc* doc = FunctionDoc::empty())
         : name_(std::move(name)),
           kind_(kind),
           arity_(arity),
           default_options_(default_options),
           doc_(doc) {}
   
     Status CheckArity(int passed_num_args) const;
   
     std::string name_;
     Function::Kind kind_;
     Arity arity_;
     const FunctionOptions* default_options_ = NULLPTR;
     const FunctionDoc* doc_;
   ```

##########
File path: python/pyarrow/compute.py
##########
@@ -130,32 +146,38 @@ def _handle_options(name, option_class, options, kwargs):
     return options
 
 
-def _simple_unary_function(name):
-    func = get_function(name)
-    option_class = _option_classes.get(name)
-
-    if option_class is not None:
-        def wrapper(arg, *, options=None, memory_pool=None, **kwargs):
-            options = _handle_options(name, option_class, options, kwargs)
-            return func.call([arg], options, memory_pool)
-    else:
-        def wrapper(arg, *, memory_pool=None):
-            return func.call([arg], None, memory_pool)
-
-    return _decorate_compute_function(wrapper, name, func, option_class)
-
-
-def _simple_binary_function(name):
-    func = get_function(name)
-    option_class = _option_classes.get(name)
-
+_wrapper_template = dedent("""\
+    def make_wrapper(func, option_class):
+        def {func_name}({args_sig}, *, memory_pool=None):
+            return func.call([{args_sig}], None, memory_pool)
+        return {func_name}
+    """)
+
+_wrapper_options_template = dedent("""\
+    def make_wrapper(func, option_class):
+        def {func_name}({args_sig}, *, options=None, memory_pool=None,
+                        **kwargs):
+            options = _handle_options({func_name!r}, option_class, options,
+                                      kwargs)
+            return func.call([{args_sig}], options, memory_pool)
+        return {func_name}
+    """)

Review comment:
       Maybe we could set the `__signature__` property?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org