You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2023/04/03 14:06:36 UTC
[madlib] 07/08: Address review comments
This is an automated email from the ASF dual-hosted git repository.
okislal pushed a commit to branch madlib2-master
in repository https://gitbox.apache.org/repos/asf/madlib.git
commit 533d5e73f8444009026c9b06908edc69c51af189
Author: Orhan Kislal <ok...@apache.org>
AuthorDate: Wed Mar 29 19:12:38 2023 +0300
Address review comments
---
src/madpack/configyml.py | 1 -
src/madpack/madpack.py | 9 ++++++++-
src/ports/postgres/madpack/SQLCommon.m4_in | 15 ++++++++++++++-
.../modules/dbscan/test/unit_tests/test_dbscan.py_in | 13 +++----------
.../modules/deep_learning/madlib_keras_predict.py_in | 2 +-
src/ports/postgres/modules/glm/multinom.py_in | 1 -
src/ports/postgres/modules/linalg/matrix_ops.py_in | 4 +++-
.../recursive_partitioning/test/decision_tree.sql_in | 2 +-
src/ports/postgres/modules/utilities/debug.py_in | 3 +++
9 files changed, 33 insertions(+), 17 deletions(-)
diff --git a/src/madpack/configyml.py b/src/madpack/configyml.py
index 91143ebd..1251b068 100644
--- a/src/madpack/configyml.py
+++ b/src/madpack/configyml.py
@@ -42,7 +42,6 @@ def get_version(configdir):
print("configyml : ERROR : missing or malformed Version.yml")
exit(2)
- # XXX
conf = convert(conf)
try:
conf['version']
diff --git a/src/madpack/madpack.py b/src/madpack/madpack.py
index b2b6dd3e..eb0619de 100755
--- a/src/madpack/madpack.py
+++ b/src/madpack/madpack.py
@@ -25,7 +25,13 @@ from utilities import remove_comments_from_sql
from utilities import run_query
# Required Python version
-py_min_ver = [2, 6]
+py_min_ver = [2, 7]
+
+if list(sys.version_info[:2]) < py_min_ver:
+ print("ERROR: python version too old ({0}). You need {1} or greater.".
+ format('.'.join(map(str, sys.version_info[:3])),
+ '.'.join(map(str, py_min_ver))))
+ exit(1)
# raw_input isn't defined in Python3.x, whereas input wasn't behaving like raw_input in Python 2.x
# this should make both input and raw_input work in Python 2.x/3.x like the raw_input from Python 2.x
@@ -399,6 +405,7 @@ def _plpy_check(py_min_ver):
error_(this, """Cannot create language plpython3u. Please check if you
have configured and installed portid (your platform) with
`--with-python` option. Stopping installation...""", False)
+ raise Exception
# Check PL/Python version
_internal_run_query("DROP FUNCTION IF EXISTS plpy_version_for_madlib();", False)
diff --git a/src/ports/postgres/madpack/SQLCommon.m4_in b/src/ports/postgres/madpack/SQLCommon.m4_in
index 81427b9e..9521d3d2 100644
--- a/src/ports/postgres/madpack/SQLCommon.m4_in
+++ b/src/ports/postgres/madpack/SQLCommon.m4_in
@@ -36,7 +36,20 @@ m4_define(<!WithTracebackForwarding!>, <!
try:
$1
except Exception as e:
-
+/* FIXME:
+* Commenting out the following block since the SD/GD handling is different with python3
+* The effect should be minimal since in the case of an error, the user should start a new session anyway.
+
+ global SD
+ global GD
+
+ for k in SD.keys():
+ del SD[k]
+ del SD
+ for k in GD.keys():
+ del GD[k]
+ del GD
+*/
etype, _, tb = exc_info()
detail = ''.join(traceback.format_exception(etype, e, tb))
message = e.message + 'SegmentTraceback' + detail
diff --git a/src/ports/postgres/modules/dbscan/test/unit_tests/test_dbscan.py_in b/src/ports/postgres/modules/dbscan/test/unit_tests/test_dbscan.py_in
index e0acea4f..b1095930 100644
--- a/src/ports/postgres/modules/dbscan/test/unit_tests/test_dbscan.py_in
+++ b/src/ports/postgres/modules/dbscan/test/unit_tests/test_dbscan.py_in
@@ -358,19 +358,11 @@ class DBSCANTestCase(unittest.TestCase):
expected_res = self.build_expected_dbscan_leaf_output(ret_ids, cluster_ids, external_flags,
core_points)
# FIXME: This test depends on the order of reading from a dictionary which is not guaranteed in py3
- # print(res)
- # print(expected_res)
# for output_rec in res:
# if output_rec.leaf_id == output_rec.dist_id:
- # print('internal')
- # print(output_rec)
- # print(expected_res[output_rec.id]['internal'])
- # # self.assertEqual(output_rec, expected_res[output_rec.id]['internal'])
+ # self.assertEqual(output_rec, expected_res[output_rec.id]['internal'])
# else:
- # print('external')
- # print(output_rec)
- # print(expected_res[output_rec.id]['external'].pop())
- # # self.assertEqual(output_rec, expected_res[output_rec.id]['external'].pop())
+ # self.assertEqual(output_rec, expected_res[output_rec.id]['external'].pop())
self.assertEqual(len(res), len(ret_ids),
"All {} expected rows match rows returned by dbscan_leaf(), but also got {} unexpected rows!".
@@ -394,6 +386,7 @@ class DBSCANTestCase(unittest.TestCase):
core_points = { 5555, 0, 66, 21, 43, 11 }
expected_res = self.build_expected_dbscan_leaf_output(ret_ids, cluster_ids, external_flags,
core_points)
+
# FIXME: This test depends on the order of reading from a dictionary which is not guaranteed in py3
# for output_rec in res:
# if output_rec.leaf_id == output_rec.dist_id:
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
index 45b724e7..99bde223 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
@@ -167,7 +167,7 @@ class BasePredict():
{self.schema_madlib}.internal_keras_predict
({independent_varname_sql},
$1,
- $2,
+ CASE WHEN {self.test_table}.ctid = min_ctid.ctid THEN $2 ELSE NULL END,
{self.normalizing_const},
{gp_segment_id_col},
ARRAY{seg_ids_test},
diff --git a/src/ports/postgres/modules/glm/multinom.py_in b/src/ports/postgres/modules/glm/multinom.py_in
index 7cf36a6b..b5d47d21 100644
--- a/src/ports/postgres/modules/glm/multinom.py_in
+++ b/src/ports/postgres/modules/glm/multinom.py_in
@@ -121,7 +121,6 @@ def __multinom_validate_args(
if not (isinstance(category_list[0], int) or
isinstance(category_list[0], float) or
- isinstance(category_list[0], int) or
isinstance(category_list[0], str)):
plpy.error("Multinom error: Given category type is not supported!\n"
"Only numeric, character, binary data and enumerated types "
diff --git a/src/ports/postgres/modules/linalg/matrix_ops.py_in b/src/ports/postgres/modules/linalg/matrix_ops.py_in
index fa7487fb..cbb87d10 100644
--- a/src/ports/postgres/modules/linalg/matrix_ops.py_in
+++ b/src/ports/postgres/modules/linalg/matrix_ops.py_in
@@ -67,7 +67,9 @@ def _matrix_column_to_array_format(source_table, row_id, output_table,
"Not all columns are numeric!")
plpy.execute("""
- CREATE {temp_str} TABLE {output_table} AS
+ CREATE {temp_str} TABLE {output_table}
+ m4_ifdef(`__POSTGRESQL__', `',
+ `WITH (APPENDONLY=TRUE)') AS
SELECT
{row_id} as row_id,
array[{val_col_names}]::double precision[] AS row_vec
diff --git a/src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in b/src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in
index 0d5076a3..72a209cc 100644
--- a/src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in
+++ b/src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in
@@ -289,7 +289,7 @@ SELECT tree_train('dt_golf'::text, -- source table
);
SELECT _print_decision_tree(tree) from train_output;
--- TODO: fix displayLeafNode
+-- FIXME: fix displayLeafNode
-- tree display tests are disabled since they crash the db on mac
-- SELECT tree_display('train_output', False);
SELECT impurity_var_importance FROM train_output;
diff --git a/src/ports/postgres/modules/utilities/debug.py_in b/src/ports/postgres/modules/utilities/debug.py_in
index e5a7b52f..9ec33c9c 100644
--- a/src/ports/postgres/modules/utilities/debug.py_in
+++ b/src/ports/postgres/modules/utilities/debug.py_in
@@ -183,6 +183,9 @@ def plpy_execute(*args, **kwargs):
try:
res = plpy.execute(*args, **kwargs)
except plpy.SPIError as e:
+ # FIXME:
+ # The following message parsing doesn't work since python3 exception
+ # implementation is different.
# msg = e.args[0]
# if 'SegmentTraceback' in msg:
# e.args[0], detail = msg.split('SegmentTraceback')