You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2023/04/03 14:06:36 UTC

[madlib] 07/08: Address review comments

This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch madlib2-master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 533d5e73f8444009026c9b06908edc69c51af189
Author: Orhan Kislal <ok...@apache.org>
AuthorDate: Wed Mar 29 19:12:38 2023 +0300

    Address review comments
---
 src/madpack/configyml.py                                  |  1 -
 src/madpack/madpack.py                                    |  9 ++++++++-
 src/ports/postgres/madpack/SQLCommon.m4_in                | 15 ++++++++++++++-
 .../modules/dbscan/test/unit_tests/test_dbscan.py_in      | 13 +++----------
 .../modules/deep_learning/madlib_keras_predict.py_in      |  2 +-
 src/ports/postgres/modules/glm/multinom.py_in             |  1 -
 src/ports/postgres/modules/linalg/matrix_ops.py_in        |  4 +++-
 .../recursive_partitioning/test/decision_tree.sql_in      |  2 +-
 src/ports/postgres/modules/utilities/debug.py_in          |  3 +++
 9 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/src/madpack/configyml.py b/src/madpack/configyml.py
index 91143ebd..1251b068 100644
--- a/src/madpack/configyml.py
+++ b/src/madpack/configyml.py
@@ -42,7 +42,6 @@ def get_version(configdir):
         print("configyml : ERROR : missing or malformed Version.yml")
         exit(2)
 
-    # XXX
     conf = convert(conf)
     try:
         conf['version']
diff --git a/src/madpack/madpack.py b/src/madpack/madpack.py
index b2b6dd3e..eb0619de 100755
--- a/src/madpack/madpack.py
+++ b/src/madpack/madpack.py
@@ -25,7 +25,13 @@ from utilities import remove_comments_from_sql
 from utilities import run_query
 
 # Required Python version
-py_min_ver = [2, 6]
+py_min_ver = [2, 7]
+
+if list(sys.version_info[:2]) < py_min_ver:
+    print("ERROR: python version too old ({0}). You need {1} or greater.".
+          format('.'.join(map(str, sys.version_info[:3])),
+                 '.'.join(map(str, py_min_ver))))
+    exit(1)
 
 # raw_input isn't defined in Python3.x, whereas input wasn't behaving like raw_input in Python 2.x
 # this should make both input and raw_input work in Python 2.x/3.x like the raw_input from Python 2.x
@@ -399,6 +405,7 @@ def _plpy_check(py_min_ver):
             error_(this, """Cannot create language plpython3u. Please check if you
                 have configured and installed portid (your platform) with
                 `--with-python` option. Stopping installation...""", False)
+            raise Exception
 
     # Check PL/Python version
     _internal_run_query("DROP FUNCTION IF EXISTS plpy_version_for_madlib();", False)
diff --git a/src/ports/postgres/madpack/SQLCommon.m4_in b/src/ports/postgres/madpack/SQLCommon.m4_in
index 81427b9e..9521d3d2 100644
--- a/src/ports/postgres/madpack/SQLCommon.m4_in
+++ b/src/ports/postgres/madpack/SQLCommon.m4_in
@@ -36,7 +36,20 @@ m4_define(<!WithTracebackForwarding!>, <!
     try:
         $1
     except Exception as e:
-
+/* FIXME:
+* Commenting out the following block since the SD/GD handling is different with python3
+* The effect should be minimal since in the case of an error, the user should start a new session anyway.
+
+        global SD
+        global GD
+
+        for k in SD.keys():
+            del SD[k]
+        del SD
+        for k in GD.keys():
+            del GD[k]
+        del GD
+*/
         etype, _, tb = exc_info()
         detail = ''.join(traceback.format_exception(etype, e, tb))
         message = e.message + 'SegmentTraceback' + detail
diff --git a/src/ports/postgres/modules/dbscan/test/unit_tests/test_dbscan.py_in b/src/ports/postgres/modules/dbscan/test/unit_tests/test_dbscan.py_in
index e0acea4f..b1095930 100644
--- a/src/ports/postgres/modules/dbscan/test/unit_tests/test_dbscan.py_in
+++ b/src/ports/postgres/modules/dbscan/test/unit_tests/test_dbscan.py_in
@@ -358,19 +358,11 @@ class DBSCANTestCase(unittest.TestCase):
         expected_res = self.build_expected_dbscan_leaf_output(ret_ids, cluster_ids, external_flags,
                                                               core_points)
         # FIXME: This test depends on the order of reading from a dictionary which is not guaranteed in py3
-        # print(res)
-        # print(expected_res)
         # for output_rec in res:
         #     if output_rec.leaf_id == output_rec.dist_id:
-        #         print('internal')
-        #         print(output_rec)
-        #         print(expected_res[output_rec.id]['internal'])
-        #         # self.assertEqual(output_rec, expected_res[output_rec.id]['internal'])
+        #         self.assertEqual(output_rec, expected_res[output_rec.id]['internal'])
         #     else:
-        #         print('external')
-        #         print(output_rec)
-        #         print(expected_res[output_rec.id]['external'].pop())
-        #         # self.assertEqual(output_rec, expected_res[output_rec.id]['external'].pop())
+        #         self.assertEqual(output_rec, expected_res[output_rec.id]['external'].pop())
 
         self.assertEqual(len(res), len(ret_ids),
             "All {} expected rows match rows returned by dbscan_leaf(), but also got {} unexpected rows!".
@@ -394,6 +386,7 @@ class DBSCANTestCase(unittest.TestCase):
         core_points = { 5555, 0, 66, 21, 43, 11 }
         expected_res = self.build_expected_dbscan_leaf_output(ret_ids, cluster_ids, external_flags,
                                                               core_points)
+
         # FIXME: This test depends on the order of reading from a dictionary which is not guaranteed in py3
         # for output_rec in res:
         #     if output_rec.leaf_id == output_rec.dist_id:
diff --git a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
index 45b724e7..99bde223 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_predict.py_in
@@ -167,7 +167,7 @@ class BasePredict():
                             {self.schema_madlib}.internal_keras_predict
                                 ({independent_varname_sql},
                                 $1,
-                                $2,
+                                CASE WHEN {self.test_table}.ctid = min_ctid.ctid THEN $2 ELSE NULL END,
                                 {self.normalizing_const},
                                 {gp_segment_id_col},
                                 ARRAY{seg_ids_test},
diff --git a/src/ports/postgres/modules/glm/multinom.py_in b/src/ports/postgres/modules/glm/multinom.py_in
index 7cf36a6b..b5d47d21 100644
--- a/src/ports/postgres/modules/glm/multinom.py_in
+++ b/src/ports/postgres/modules/glm/multinom.py_in
@@ -121,7 +121,6 @@ def __multinom_validate_args(
 
     if not (isinstance(category_list[0], int) or
             isinstance(category_list[0], float) or
-            isinstance(category_list[0], int) or
             isinstance(category_list[0], str)):
         plpy.error("Multinom error: Given category type is not supported!\n"
                    "Only numeric, character, binary data and enumerated types "
diff --git a/src/ports/postgres/modules/linalg/matrix_ops.py_in b/src/ports/postgres/modules/linalg/matrix_ops.py_in
index fa7487fb..cbb87d10 100644
--- a/src/ports/postgres/modules/linalg/matrix_ops.py_in
+++ b/src/ports/postgres/modules/linalg/matrix_ops.py_in
@@ -67,7 +67,9 @@ def _matrix_column_to_array_format(source_table, row_id, output_table,
             "Not all columns are numeric!")
 
     plpy.execute("""
-        CREATE {temp_str} TABLE {output_table} AS
+        CREATE {temp_str} TABLE {output_table}
+        m4_ifdef(`__POSTGRESQL__', `',
+            `WITH (APPENDONLY=TRUE)') AS
         SELECT
             {row_id} as row_id,
             array[{val_col_names}]::double precision[] AS row_vec
diff --git a/src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in b/src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in
index 0d5076a3..72a209cc 100644
--- a/src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in
+++ b/src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in
@@ -289,7 +289,7 @@ SELECT tree_train('dt_golf'::text,         -- source table
                          );
 
 SELECT _print_decision_tree(tree) from train_output;
--- TODO: fix displayLeafNode
+-- FIXME: fix displayLeafNode
 -- tree display tests are disabled since they crash the db on mac
 -- SELECT tree_display('train_output', False);
 SELECT impurity_var_importance FROM train_output;
diff --git a/src/ports/postgres/modules/utilities/debug.py_in b/src/ports/postgres/modules/utilities/debug.py_in
index e5a7b52f..9ec33c9c 100644
--- a/src/ports/postgres/modules/utilities/debug.py_in
+++ b/src/ports/postgres/modules/utilities/debug.py_in
@@ -183,6 +183,9 @@ def plpy_execute(*args, **kwargs):
         try:
             res = plpy.execute(*args, **kwargs)
         except plpy.SPIError as e:
+            # FIXME:
+            # The following message parsing doesn't work since python3 exception
+            # implementation is different.
             # msg = e.args[0]
             # if 'SegmentTraceback' in msg:
             #     e.args[0], detail = msg.split('SegmentTraceback')