You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by do...@apache.org on 2021/01/04 20:49:18 UTC

[madlib] 01/05: Add utilities/debug.py_in

This is an automated email from the ASF dual-hosted git repository.

domino pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit eab9213a1d698f4029fa54df6152320afdf2a1bc
Author: Domino Valdano <dv...@pivotal.io>
AuthorDate: Thu May 7 23:48:34 2020 +0000

    Add utilities/debug.py_in
    
    Starting a module of debugging functions we can use, to easily
    turn on or off certain kinds of debug messages.
    
    debug.print_timings(force=False) :
       Used in madlib_keras_fit_multiple.py_in to print
       timing information for fit_multiple()
       Breaks down run_training() into _hop_time_,
       _uda_time_, _truncate_time_, _copy_time_, and
       _delete_time_
    
      Enable all by uncommenting:
         debug.timings_enabled = True
       at top of madlib_keras_fit_multiple.py_in
    
      Enable a single call to debug.print_timings(),
         by passing force=True.  Will print even if
         debug.timings_enabled is not set.
    
    debug.print_mst_keys(force=False) :
        print a map between mst_keys and seg_id's as the mst_keys
        hop around from segment to segment... to verify correct
        MOP behavior.
    
    debug.plpy.execute(query)
    
        prints query, EXPLAIN ANALYZE, and timing of a query
        while executing it
---
 src/ports/postgres/modules/utilities/debug.py_in   | 145 +++++++++++++++++++++
 .../postgres/modules/utilities/utilities.py_in     |  21 ---
 2 files changed, 145 insertions(+), 21 deletions(-)

diff --git a/src/ports/postgres/modules/utilities/debug.py_in b/src/ports/postgres/modules/utilities/debug.py_in
new file mode 100644
index 0000000..ccdceba
--- /dev/null
+++ b/src/ports/postgres/modules/utilities/debug.py_in
@@ -0,0 +1,145 @@
+import plpy as plpy_orig
+import time
+from deep_learning.madlib_keras_model_selection import ModelSelectionSchema
+from deep_learning.madlib_keras_helper import DISTRIBUTION_KEY_COLNAME
+
+mst_key_col = ModelSelectionSchema.MST_KEY
+dist_key_col = DISTRIBUTION_KEY_COLNAME
+
+start_times = dict()
+timings_enabled = False
+
+def start_timing(msg, force=False):
+    if timings_enabled or force:
+        start_times[msg] = time.time()
+        plpy_orig.info("|_{}_time_HDR|Elapsed (s)|Current|Current (s)|Start|Start (s)|".format(msg))
+
+def print_timing(msg, force=False):
+    if timings_enabled or force:
+        try:
+            start_time = start_times[msg]
+        except:
+            raise Exception(
+                "print_timing({msg}) called with no start_timing({msg})!".format(msg=msg)
+            )
+        current_time = time.time() 
+        plpy_orig.info(
+            '|_{0}_time|{1}|{2}|{3}|{4}|{5}'.format(
+                msg,
+                current_time - start_time,
+                time.ctime(current_time),
+                current_time,
+                time.ctime(start_time),
+                start_time
+            )
+        )
+
+mst_keys_enabled = False
+def print_mst_keys(table, label, force=False):
+    if not (mst_keys_enabled or force):
+        return
+
+    res = plpy_orig.execute("""
+        SELECT gp_segment_id AS seg_id,
+               {mst_key_col},
+               {dist_key_col}
+        FROM {table} ORDER BY {dist_key_col}
+    """.format(dist_key_col=dist_key_col,
+               table=table,
+               mst_key_col=mst_key_col))
+
+    plpy_orig.info("|_MST_KEYS_{label}_HDR|mst_key|seg_id|dist_key|table".format(**locals()))
+    if not res:
+        plpy_orig.error("{table} is empty!  Aborting".format(table=table))
+
+    for r in res:
+        seg_id = r['seg_id']
+        mst_key = r['mst_key']
+        dist_key = r[dist_key_col]
+        plpy_orig.info("|_MST_KEYS_{label}|{mst_key}|{seg_id}|{dist_key}|{table}".format(**locals()))
+
+plpy_execute_enabled = False
+def plpy_execute(*args, **kwargs):
+    """ debug.plpy.execute(sql, ..., force=False)
+
+        Replace plpy.execute(sql, ...) with
+        debug.plpy.execute(sql, ...) to debug
+        a query.  Shows the query itself, the
+        EXPLAIN of it, and how long the query
+        takes to execute.
+    """
+
+    force = False
+    if 'force' in kwargs:
+        del kwargs['force']
+        force = force['force']
+
+    plpy = plpy_orig # override global plpy,
+                     # to avoid infinite recursion
+
+    if not (plpy_execute_enabled or force):
+        return plpy.execute(*args, **kwargs)
+
+    if len(args) > 0:
+        sql = args[0]
+    else:
+        raise TypeError('debug.plpy.execute() takes at least 1 parameter, 0 passed')
+
+    if type(sql) == str: # can't print if a PLyPlan object
+        plpy.info(sql)
+
+        # Print EXPLAIN of sql command
+        res = plpy.execute("EXPLAIN " + sql, *args[1:], **kwargs)
+        for r in res:
+            plpy.info(r['QUERY PLAN'])
+
+    # Run actual sql command, with timing
+    start = time.time()
+    res = plpy.execute(*args, **kwargs)
+
+    # Print how long execution of query took
+    plpy.info("Query took {0}s".format(time.time() - start))
+    if res:
+        plpy.info("Query returned {} row(s)".format(len(res)))
+    else:
+        plpy.info("Query returned 0 rows")
+    return res
+
+plpy_info_enabled = False
+def plpy_info(*args, **kwargs):
+    """ plpy_info(..., force=False)
+
+      plpy.info() if enabled, otherwise do nothing   
+    """
+
+    force = False
+    if 'force' in kwargs:
+        del kwargs['force']
+        force = kwargs['force']
+
+    if plpy_info_enabled or force:
+        plpy_orig.info(*args, **kwargs)
+
+plpy_debug_enabled = False
+def plpy_debug(*args, **kwargs):
+    """ debug.plpy.debug(..., force=False)
+
+        Behaves like plpy.debug() if disabled (printing only
+        if DEBUG level is set high enough), but becomes a
+        plpy.info() if enabled.
+    """
+
+    force = False
+    if 'force' in kwargs:
+        del kwargs['force']
+        force = kwargs['force']
+
+    if plpy_debug_enabled or force:
+        plpy_orig.info(*args, **kwargs)
+    else:
+        plpy_orig.debug(*args, **kwargs)
+
+class plpy:
+    execute = staticmethod(plpy_execute)
+    info = staticmethod(plpy_info)
+    debug = staticmethod(plpy_debug)
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index eb507d9..3cb219a 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -21,29 +21,8 @@ from validate_args import unquote_ident
 from validate_args import drop_tables
 import plpy
 
-
 m4_changequote(`<!', `!>')
 
-def plpy_execute_debug(sql, *args, **kwargs):
-    """ Replace plpy.execute(sql, ...) with
-        plpy_execute_debug(sql, ...) to debug
-        a query.  Shows the query itself, the
-        EXPLAIN of it, and how long the query
-        takes to execute.
-    """
-    plpy.info(sql)  # Print sql command
-
-    # Print EXPLAIN of sql command
-    res = plpy.execute("EXPLAIN " + sql, *args)
-    for r in res:
-        plpy.info(r['QUERY PLAN'])
-
-    # Run actual sql command, with timing
-    start = time.time()
-    plpy.execute(sql, *args)
-
-    # Print how long execution of query took
-    plpy.info("Query took {0}s".format(time.time() - start))
 
 def has_function_properties():
     """ __HAS_FUNCTION_PROPERTIES__ variable defined during configure """