You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@beam.apache.org by GitBox <gi...@apache.org> on 2020/06/04 00:03:30 UTC

[GitHub] [beam] TheNeuralBit commented on a change in pull request #11787: [BEAM-10063] Better emulate the pandas testing environment.

TheNeuralBit commented on a change in pull request #11787:
URL: https://github.com/apache/beam/pull/11787#discussion_r434919303



##########
File path: sdks/python/apache_beam/dataframe/doctests.py
##########
@@ -66,30 +93,19 @@ def __init__(self):
     self._all_frames = {}
 
   def fake_pandas_module(self):
-    class FakePandas(object):
-      """A stand-in for the pandas top-level module.
-      """
-      # For now, only populated with the frame types (below).
-      # TODO(BEAM-9561): We may want to put more here.
-      pass
-
-    fake_pd = FakePandas()
-    for pandas_type, deferred_type in DeferredFrame._pandas_type_map.items():
-      setattr(
-          fake_pd,
-          pandas_type.__name__,
-          self._deferred_frame(pandas_type, deferred_type))
-
-    return fake_pd
-
-  def _deferred_frame(self, pandas_type, deferred_type):
+    return FakePandasObject(pd, self)
+
+  def _deferred_frame(self, pandas_callable):
     """Creates a "constructor" that record the actual value as an input and
     returns a placeholder frame in its place."""
     def wrapper(*args, **kwargs):
-      df = pandas_type(*args, **kwargs)
-      placeholder = expressions.PlaceholderExpression(df[0:0])
-      self._inputs[placeholder] = df
-      return deferred_type(placeholder)
+      df = pandas_callable(*args, **kwargs)
+      if type(df) in DeferredFrame._pandas_type_map.keys():
+        placeholder = expressions.PlaceholderExpression(df[0:0])
+        self._inputs[placeholder] = df
+        return DeferredFrame.wrap(placeholder)
+      else:
+        return df

Review comment:
       It looks like this is inlined in `FakePandasObject.__call__` now, we should either remove `_deferred_frame` or call it from FakePandasObject. I think I'm partial to the latter since the method modifies `_inputs`, but either way is fine.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org