You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by do...@apache.org on 2021/01/04 20:49:18 UTC
[madlib] 01/05: Add utilities/debug.py_in
This is an automated email from the ASF dual-hosted git repository.
domino pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit eab9213a1d698f4029fa54df6152320afdf2a1bc
Author: Domino Valdano <dv...@pivotal.io>
AuthorDate: Thu May 7 23:48:34 2020 +0000
Add utilities/debug.py_in
Starting a module of debugging functions we can use, to easily
turn on or off certain kinds of debug messages.
debug.print_timings(force=False) :
Used in madlib_keras_fit_multiple.py_in to print
timing information for fit_multiple()
Breaks down run_training() into _hop_time_,
_uda_time_, _truncate_time_, _copy_time_, and
_delete_time_
Enable all by uncommenting:
debug.timings_enabled = True
at top of madlib_keras_fit_multiple.py_in
Enable a single call to debug.print_timings(),
by passing force=True. Will print even if
debug.timings_enabled is not set.
debug.print_mst_keys(force=False) :
print a map between mst_keys and seg_id's as the mst_keys
hop around from segment to segment... to verify correct
MOP behavior.
debug.plpy.execute(query)
prints query, EXPLAIN ANALYZE, and timing of a query
while executing it
---
src/ports/postgres/modules/utilities/debug.py_in | 145 +++++++++++++++++++++
.../postgres/modules/utilities/utilities.py_in | 21 ---
2 files changed, 145 insertions(+), 21 deletions(-)
diff --git a/src/ports/postgres/modules/utilities/debug.py_in b/src/ports/postgres/modules/utilities/debug.py_in
new file mode 100644
index 0000000..ccdceba
--- /dev/null
+++ b/src/ports/postgres/modules/utilities/debug.py_in
@@ -0,0 +1,145 @@
+import plpy as plpy_orig
+import time
+from deep_learning.madlib_keras_model_selection import ModelSelectionSchema
+from deep_learning.madlib_keras_helper import DISTRIBUTION_KEY_COLNAME
+
+mst_key_col = ModelSelectionSchema.MST_KEY
+dist_key_col = DISTRIBUTION_KEY_COLNAME
+
+start_times = dict()
+timings_enabled = False
+
+def start_timing(msg, force=False):
+ if timings_enabled or force:
+ start_times[msg] = time.time()
+ plpy_orig.info("|_{}_time_HDR|Elapsed (s)|Current|Current (s)|Start|Start (s)|".format(msg))
+
+def print_timing(msg, force=False):
+ if timings_enabled or force:
+ try:
+ start_time = start_times[msg]
+ except:
+ raise Exception(
+ "print_timing({msg}) called with no start_timing({msg})!".format(msg=msg)
+ )
+ current_time = time.time()
+ plpy_orig.info(
+ '|_{0}_time|{1}|{2}|{3}|{4}|{5}'.format(
+ msg,
+ current_time - start_time,
+ time.ctime(current_time),
+ current_time,
+ time.ctime(start_time),
+ start_time
+ )
+ )
+
+mst_keys_enabled = False
+def print_mst_keys(table, label, force=False):
+ if not (mst_keys_enabled or force):
+ return
+
+ res = plpy_orig.execute("""
+ SELECT gp_segment_id AS seg_id,
+ {mst_key_col},
+ {dist_key_col}
+ FROM {table} ORDER BY {dist_key_col}
+ """.format(dist_key_col=dist_key_col,
+ table=table,
+ mst_key_col=mst_key_col))
+
+ plpy_orig.info("|_MST_KEYS_{label}_HDR|mst_key|seg_id|dist_key|table".format(**locals()))
+ if not res:
+ plpy_orig.error("{table} is empty! Aborting".format(table=table))
+
+ for r in res:
+ seg_id = r['seg_id']
+ mst_key = r['mst_key']
+ dist_key = r[dist_key_col]
+ plpy_orig.info("|_MST_KEYS_{label}|{mst_key}|{seg_id}|{dist_key}|{table}".format(**locals()))
+
+plpy_execute_enabled = False
+def plpy_execute(*args, **kwargs):
+ """ debug.plpy.execute(sql, ..., force=False)
+
+ Replace plpy.execute(sql, ...) with
+ debug.plpy.execute(sql, ...) to debug
+ a query. Shows the query itself, the
+ EXPLAIN of it, and how long the query
+ takes to execute.
+ """
+
+ force = False
+ if 'force' in kwargs:
+ del kwargs['force']
+ force = force['force']
+
+ plpy = plpy_orig # override global plpy,
+ # to avoid infinite recursion
+
+ if not (plpy_execute_enabled or force):
+ return plpy.execute(*args, **kwargs)
+
+ if len(args) > 0:
+ sql = args[0]
+ else:
+ raise TypeError('debug.plpy.execute() takes at least 1 parameter, 0 passed')
+
+ if type(sql) == str: # can't print if a PLyPlan object
+ plpy.info(sql)
+
+ # Print EXPLAIN of sql command
+ res = plpy.execute("EXPLAIN " + sql, *args[1:], **kwargs)
+ for r in res:
+ plpy.info(r['QUERY PLAN'])
+
+ # Run actual sql command, with timing
+ start = time.time()
+ res = plpy.execute(*args, **kwargs)
+
+ # Print how long execution of query took
+ plpy.info("Query took {0}s".format(time.time() - start))
+ if res:
+ plpy.info("Query returned {} row(s)".format(len(res)))
+ else:
+ plpy.info("Query returned 0 rows")
+ return res
+
+plpy_info_enabled = False
+def plpy_info(*args, **kwargs):
+ """ plpy_info(..., force=False)
+
+ plpy.info() if enabled, otherwise do nothing
+ """
+
+ force = False
+ if 'force' in kwargs:
+ del kwargs['force']
+ force = kwargs['force']
+
+ if plpy_info_enabled or force:
+ plpy_orig.info(*args, **kwargs)
+
+plpy_debug_enabled = False
+def plpy_debug(*args, **kwargs):
+ """ debug.plpy.debug(..., force=False)
+
+ Behaves like plpy.debug() if disabled (printing only
+ if DEBUG level is set high enough), but becomes a
+ plpy.info() if enabled.
+ """
+
+ force = False
+ if 'force' in kwargs:
+ del kwargs['force']
+ force = kwargs['force']
+
+ if plpy_debug_enabled or force:
+ plpy_orig.info(*args, **kwargs)
+ else:
+ plpy_orig.debug(*args, **kwargs)
+
+class plpy:
+ execute = staticmethod(plpy_execute)
+ info = staticmethod(plpy_info)
+ debug = staticmethod(plpy_debug)
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index eb507d9..3cb219a 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -21,29 +21,8 @@ from validate_args import unquote_ident
from validate_args import drop_tables
import plpy
-
m4_changequote(`<!', `!>')
-def plpy_execute_debug(sql, *args, **kwargs):
- """ Replace plpy.execute(sql, ...) with
- plpy_execute_debug(sql, ...) to debug
- a query. Shows the query itself, the
- EXPLAIN of it, and how long the query
- takes to execute.
- """
- plpy.info(sql) # Print sql command
-
- # Print EXPLAIN of sql command
- res = plpy.execute("EXPLAIN " + sql, *args)
- for r in res:
- plpy.info(r['QUERY PLAN'])
-
- # Run actual sql command, with timing
- start = time.time()
- plpy.execute(sql, *args)
-
- # Print how long execution of query took
- plpy.info("Query took {0}s".format(time.time() - start))
def has_function_properties():
""" __HAS_FUNCTION_PROPERTIES__ variable defined during configure """