You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by do...@apache.org on 2021/01/04 20:49:17 UTC
[madlib] branch master updated (cfe28c3 -> 39c2421)
This is an automated email from the ASF dual-hosted git repository.
domino pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git.
from cfe28c3 DL: Remove unused variables and rename is_final_iteration in eval
new eab9213 Add utilities/debug.py_in
new c9e34c3 Fix force option in kwargs
new 9479416 Fix FunctionHandle warning for pg12
new 62ba6c8 Add .idea to .gitignore
new 39c2421 Add debug.plpy.prepare to utilities/debug.py_in
The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.gitignore | 1 +
.../postgres/dbconnector/FunctionHandle_impl.hpp | 2 +-
src/ports/postgres/modules/utilities/debug.py_in | 206 +++++++++++++++++++++
.../postgres/modules/utilities/utilities.py_in | 21 ---
4 files changed, 208 insertions(+), 22 deletions(-)
create mode 100644 src/ports/postgres/modules/utilities/debug.py_in
[madlib] 01/05: Add utilities/debug.py_in
Posted by do...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
domino pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit eab9213a1d698f4029fa54df6152320afdf2a1bc
Author: Domino Valdano <dv...@pivotal.io>
AuthorDate: Thu May 7 23:48:34 2020 +0000
Add utilities/debug.py_in
Starting a module of debugging functions we can use, to easily
turn on or off certain kinds of debug messages.
debug.print_timings(force=False) :
Used in madlib_keras_fit_multiple.py_in to print
timing information for fit_multiple()
Breaks down run_training() into _hop_time_,
_uda_time_, _truncate_time_, _copy_time_, and
_delete_time_
Enable all by uncommenting:
debug.timings_enabled = True
at top of madlib_keras_fit_multiple.py_in
Enable a single call to debug.print_timings(),
by passing force=True. Will print even if
debug.timings_enabled is not set.
debug.print_mst_keys(force=False) :
print a map between mst_keys and seg_id's as the mst_keys
hop around from segment to segment... to verify correct
MOP behavior.
debug.plpy.execute(query)
prints query, EXPLAIN ANALYZE, and timing of a query
while executing it
---
src/ports/postgres/modules/utilities/debug.py_in | 145 +++++++++++++++++++++
.../postgres/modules/utilities/utilities.py_in | 21 ---
2 files changed, 145 insertions(+), 21 deletions(-)
diff --git a/src/ports/postgres/modules/utilities/debug.py_in b/src/ports/postgres/modules/utilities/debug.py_in
new file mode 100644
index 0000000..ccdceba
--- /dev/null
+++ b/src/ports/postgres/modules/utilities/debug.py_in
@@ -0,0 +1,145 @@
+import plpy as plpy_orig
+import time
+from deep_learning.madlib_keras_model_selection import ModelSelectionSchema
+from deep_learning.madlib_keras_helper import DISTRIBUTION_KEY_COLNAME
+
+mst_key_col = ModelSelectionSchema.MST_KEY
+dist_key_col = DISTRIBUTION_KEY_COLNAME
+
+start_times = dict()
+timings_enabled = False
+
+def start_timing(msg, force=False):
+ if timings_enabled or force:
+ start_times[msg] = time.time()
+ plpy_orig.info("|_{}_time_HDR|Elapsed (s)|Current|Current (s)|Start|Start (s)|".format(msg))
+
+def print_timing(msg, force=False):
+ if timings_enabled or force:
+ try:
+ start_time = start_times[msg]
+ except:
+ raise Exception(
+ "print_timing({msg}) called with no start_timing({msg})!".format(msg=msg)
+ )
+ current_time = time.time()
+ plpy_orig.info(
+ '|_{0}_time|{1}|{2}|{3}|{4}|{5}'.format(
+ msg,
+ current_time - start_time,
+ time.ctime(current_time),
+ current_time,
+ time.ctime(start_time),
+ start_time
+ )
+ )
+
+mst_keys_enabled = False
+def print_mst_keys(table, label, force=False):
+ if not (mst_keys_enabled or force):
+ return
+
+ res = plpy_orig.execute("""
+ SELECT gp_segment_id AS seg_id,
+ {mst_key_col},
+ {dist_key_col}
+ FROM {table} ORDER BY {dist_key_col}
+ """.format(dist_key_col=dist_key_col,
+ table=table,
+ mst_key_col=mst_key_col))
+
+ plpy_orig.info("|_MST_KEYS_{label}_HDR|mst_key|seg_id|dist_key|table".format(**locals()))
+ if not res:
+ plpy_orig.error("{table} is empty! Aborting".format(table=table))
+
+ for r in res:
+ seg_id = r['seg_id']
+ mst_key = r['mst_key']
+ dist_key = r[dist_key_col]
+ plpy_orig.info("|_MST_KEYS_{label}|{mst_key}|{seg_id}|{dist_key}|{table}".format(**locals()))
+
+plpy_execute_enabled = False
+def plpy_execute(*args, **kwargs):
+ """ debug.plpy.execute(sql, ..., force=False)
+
+ Replace plpy.execute(sql, ...) with
+ debug.plpy.execute(sql, ...) to debug
+ a query. Shows the query itself, the
+ EXPLAIN of it, and how long the query
+ takes to execute.
+ """
+
+ force = False
+ if 'force' in kwargs:
+ del kwargs['force']
+ force = force['force']
+
+ plpy = plpy_orig # override global plpy,
+ # to avoid infinite recursion
+
+ if not (plpy_execute_enabled or force):
+ return plpy.execute(*args, **kwargs)
+
+ if len(args) > 0:
+ sql = args[0]
+ else:
+ raise TypeError('debug.plpy.execute() takes at least 1 parameter, 0 passed')
+
+ if type(sql) == str: # can't print if a PLyPlan object
+ plpy.info(sql)
+
+ # Print EXPLAIN of sql command
+ res = plpy.execute("EXPLAIN " + sql, *args[1:], **kwargs)
+ for r in res:
+ plpy.info(r['QUERY PLAN'])
+
+ # Run actual sql command, with timing
+ start = time.time()
+ res = plpy.execute(*args, **kwargs)
+
+ # Print how long execution of query took
+ plpy.info("Query took {0}s".format(time.time() - start))
+ if res:
+ plpy.info("Query returned {} row(s)".format(len(res)))
+ else:
+ plpy.info("Query returned 0 rows")
+ return res
+
+plpy_info_enabled = False
+def plpy_info(*args, **kwargs):
+ """ plpy_info(..., force=False)
+
+ plpy.info() if enabled, otherwise do nothing
+ """
+
+ force = False
+ if 'force' in kwargs:
+ del kwargs['force']
+ force = kwargs['force']
+
+ if plpy_info_enabled or force:
+ plpy_orig.info(*args, **kwargs)
+
+plpy_debug_enabled = False
+def plpy_debug(*args, **kwargs):
+ """ debug.plpy.debug(..., force=False)
+
+ Behaves like plpy.debug() if disabled (printing only
+ if DEBUG level is set high enough), but becomes a
+ plpy.info() if enabled.
+ """
+
+ force = False
+ if 'force' in kwargs:
+ del kwargs['force']
+ force = kwargs['force']
+
+ if plpy_debug_enabled or force:
+ plpy_orig.info(*args, **kwargs)
+ else:
+ plpy_orig.debug(*args, **kwargs)
+
+class plpy:
+ execute = staticmethod(plpy_execute)
+ info = staticmethod(plpy_info)
+ debug = staticmethod(plpy_debug)
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index eb507d9..3cb219a 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -21,29 +21,8 @@ from validate_args import unquote_ident
from validate_args import drop_tables
import plpy
-
m4_changequote(`<!', `!>')
-def plpy_execute_debug(sql, *args, **kwargs):
- """ Replace plpy.execute(sql, ...) with
- plpy_execute_debug(sql, ...) to debug
- a query. Shows the query itself, the
- EXPLAIN of it, and how long the query
- takes to execute.
- """
- plpy.info(sql) # Print sql command
-
- # Print EXPLAIN of sql command
- res = plpy.execute("EXPLAIN " + sql, *args)
- for r in res:
- plpy.info(r['QUERY PLAN'])
-
- # Run actual sql command, with timing
- start = time.time()
- plpy.execute(sql, *args)
-
- # Print how long execution of query took
- plpy.info("Query took {0}s".format(time.time() - start))
def has_function_properties():
""" __HAS_FUNCTION_PROPERTIES__ variable defined during configure """
[madlib] 05/05: Add debug.plpy.prepare to utilities/debug.py_in
Posted by do...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
domino pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 39c242185253ebaf8cf9bdd4c67b897fdaff673f
Author: Domino Valdano <dv...@vmware.com>
AuthorDate: Wed Dec 16 18:44:45 2020 -0800
Add debug.plpy.prepare to utilities/debug.py_in
Saves the original sql string, so that EXPLAIN plan can be built
by execute which only receives an opaque C object (not sql string).
---
src/ports/postgres/modules/utilities/debug.py_in | 83 ++++++++++++++++++++----
1 file changed, 72 insertions(+), 11 deletions(-)
diff --git a/src/ports/postgres/modules/utilities/debug.py_in b/src/ports/postgres/modules/utilities/debug.py_in
index 7051dd6..5e288be 100644
--- a/src/ports/postgres/modules/utilities/debug.py_in
+++ b/src/ports/postgres/modules/utilities/debug.py_in
@@ -58,12 +58,53 @@ def print_mst_keys(table, label, force=False):
dist_key = r[dist_key_col]
plpy_orig.info("|_MST_KEYS_{label}|{mst_key}|{seg_id}|{dist_key}|{table}".format(**locals()))
+class prep_entry:
+ def __init__(self, sql, args, kwargs):
+ self.sql = sql
+ self.args = args
+ self.kwargs = kwargs
+
+def plpy_prepare(*args, **kwargs):
+ """ debug.plpy.prepare(sql, ..., force=False)
+
+ If you want debug.plpy.execute() to be able
+ to display the query and/or plan for a
+ prepared query, you must call this function
+ (as debug.plpy.prepare() ) in place of
+ regular plpy.prepare(). Otherwise the execute
+ wrapper will not have access to the query string,
+ so you will only get timing info (no plan).
+ """
+ force = False
+ if 'force' in kwargs:
+ force = kwargs['force']
+ del kwargs['force']
+
+ plpy = plpy_orig # override global plpy,
+ # to avoid infinite recursion
+
+ if not (plpy_execute_enabled or force):
+ return plpy.prepare(*args, **kwargs)
+
+ if len(args) < 1:
+ raise TypeError('debug.plpy.execute() takes at least 1 parameter, 0 passed')
+ elif type(sql) != str:
+ raise TypeError('debug.plpy.prepare() takes a str as its 1st parameter')
+
+ sql = args[0]
+ plpy.info(sql)
+
+ plan = plpy_orig.prepare(*args, **kwargs)
+ prep = prep_entry(sql, args[1:], kwargs)
+ plpy_wrapper.prepared_queries[plan] = prep
+ return plan
+
plpy_execute_enabled = False
def plpy_execute(*args, **kwargs):
- """ debug.plpy.execute(sql, ..., force=False)
+ """ debug.plpy.execute(q, ..., force=False)
- Replace plpy.execute(sql, ...) with
- debug.plpy.execute(sql, ...) to debug
+ Replace plpy.execute(q, ...) with
+ debug.plpy.execute(q, ...) to debug
a query. Shows the query itself, the
EXPLAIN of it, and how long the query
takes to execute.
@@ -81,17 +122,32 @@ def plpy_execute(*args, **kwargs):
return plpy.execute(*args, **kwargs)
if len(args) > 0:
- sql = args[0]
+ q = args[0]
else:
raise TypeError('debug.plpy.execute() takes at least 1 parameter, 0 passed')
- if type(sql) == str: # can't print if a PLyPlan object
- plpy.info(sql)
+ prep = None
+ if type(q) == str:
+ plpy.info(q)
+ sql = q
+ elif repr(type(q)) == "<type 'PLyPlan'>":
+ if q in plpy_wrapper.prepared_queries:
+ prep = plpy_wrapper.prepared_queries[q]
+ sql = prep.sql
+ else:
+ sql = q
+ else:
+ raise TypeError(
+ "First arg of debug.plpy.execute() must be str or <type 'PLyPlan'>, got {}".format(type(q))
+ )
- # Print EXPLAIN of sql command
- res = plpy.execute("EXPLAIN " + sql, *args[1:], **kwargs)
- for r in res:
- plpy.info(r['QUERY PLAN'])
+ # Print EXPLAIN of sql command
+ explain_query = "EXPLAIN" + sql
+ if prep:
+ explain_query = plpy.prepare(explain_query, *prep.args, **prep.kwargs)
+ res = plpy.execute(explain_query, *args[1:], **kwargs)
+ for r in res:
+ plpy.info(r['QUERY PLAN'])
# Run actual sql command, with timing
start = time.time()
@@ -139,7 +195,12 @@ def plpy_debug(*args, **kwargs):
else:
plpy_orig.debug(*args, **kwargs)
-class plpy:
+class plpy_wrapper:
+ prepare = staticmethod(plpy_prepare)
execute = staticmethod(plpy_execute)
info = staticmethod(plpy_info)
debug = staticmethod(plpy_debug)
+
+ prepared_queries = dict()
+
+plpy = plpy_wrapper
[madlib] 02/05: Fix force option in kwargs
Posted by do...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
domino pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit c9e34c394b445f74de8f2b986fc4f9bd29818fe4
Author: Domino Valdano <dv...@vmware.com>
AuthorDate: Wed Oct 7 19:20:04 2020 -0700
Fix force option in kwargs
---
src/ports/postgres/modules/utilities/debug.py_in | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/ports/postgres/modules/utilities/debug.py_in b/src/ports/postgres/modules/utilities/debug.py_in
index ccdceba..7051dd6 100644
--- a/src/ports/postgres/modules/utilities/debug.py_in
+++ b/src/ports/postgres/modules/utilities/debug.py_in
@@ -71,8 +71,8 @@ def plpy_execute(*args, **kwargs):
force = False
if 'force' in kwargs:
+ force = kwargs['force']
del kwargs['force']
- force = force['force']
plpy = plpy_orig # override global plpy,
# to avoid infinite recursion
@@ -114,8 +114,8 @@ def plpy_info(*args, **kwargs):
force = False
if 'force' in kwargs:
- del kwargs['force']
force = kwargs['force']
+ del kwargs['force']
if plpy_info_enabled or force:
plpy_orig.info(*args, **kwargs)
@@ -131,8 +131,8 @@ def plpy_debug(*args, **kwargs):
force = False
if 'force' in kwargs:
- del kwargs['force']
force = kwargs['force']
+ del kwargs['force']
if plpy_debug_enabled or force:
plpy_orig.info(*args, **kwargs)
[madlib] 04/05: Add .idea to .gitignore
Posted by do...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
domino pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 62ba6c87095007df98cb6596ea7e960ae554af03
Author: Domino Valdano <dv...@vmware.com>
AuthorDate: Fri Oct 2 19:22:21 2020 -0700
Add .idea to .gitignore
---
.gitignore | 1 +
1 file changed, 1 insertion(+)
diff --git a/.gitignore b/.gitignore
index d8adb2a..ad9a296 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,3 +47,4 @@ auto
eproject.cfg
*.project.vim
.vscode
+.idea
[madlib] 03/05: Fix FunctionHandle warning for pg12
Posted by do...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
domino pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 947941604a011a2a69e65592a4bb08ae5bea7db5
Author: Domino Valdano <dv...@vmware.com>
AuthorDate: Wed May 27 19:26:12 2020 -0700
Fix FunctionHandle warning for pg12
---
src/ports/postgres/dbconnector/FunctionHandle_impl.hpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/ports/postgres/dbconnector/FunctionHandle_impl.hpp b/src/ports/postgres/dbconnector/FunctionHandle_impl.hpp
index 5587b25..c5e0863 100644
--- a/src/ports/postgres/dbconnector/FunctionHandle_impl.hpp
+++ b/src/ports/postgres/dbconnector/FunctionHandle_impl.hpp
@@ -159,7 +159,7 @@ FunctionHandle::invoke(AnyType &args) {
funcPtrCallInfo.args[i].value = args[i].getAsDatum(&funcPtrCallInfo,
mFuncInfo->getArgumentType(i));
funcPtrCallInfo.args[i].isnull = args[i].isNull();
- elog(WARNING, "funcPtrCallInfo.args[i].value %d funcPtrCallInfo.args[i].isnull %d", funcPtrCallInfo.args[i].value, funcPtrCallInfo.args[i].isnull);
+ elog(WARNING, "funcPtrCallInfo.args[i].value %lu funcPtrCallInfo.args[i].isnull %d", funcPtrCallInfo.args[i].value, funcPtrCallInfo.args[i].isnull);
}
#else
for (uint16_t i = 0; i < funcPtrCallInfo.nargs; ++i) {