You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@madlib.apache.org by ok...@apache.org on 2022/06/22 20:31:39 UTC

[madlib] branch master updated (ac22af57 -> 49bb11f0)

This is an automated email from the ASF dual-hosted git repository.

okislal pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


    from ac22af57 XGBoost: Add new module xgboost
     new f37a9292 Utilities: Reuse update plan in GroupIterationController
     new 45f533b2 SVM: Fix dev-check test typo
     new 49bb11f0 SVM: Disable ORCA to reduce planning time cost

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/ports/postgres/modules/svm/svm.py_in           |  5 +-
 src/ports/postgres/modules/svm/test/svm.sql_in     |  4 +-
 .../modules/utilities/in_mem_group_control.py_in   | 68 ++++++++++++----------
 3 files changed, 42 insertions(+), 35 deletions(-)

[madlib] 03/03: SVM: Disable ORCA to reduce planning time cost

Posted by ok...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 49bb11f04ebfb015f8b4f96947e0e8174929c841
Author: Orhan Kislal <ok...@apache.org>
AuthorDate: Thu Jun 9 18:46:42 2022 -0400

    SVM: Disable ORCA to reduce planning time cost
---
 src/ports/postgres/modules/svm/svm.py_in | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/ports/postgres/modules/svm/svm.py_in b/src/ports/postgres/modules/svm/svm.py_in
index ceddb392..6106629f 100644
--- a/src/ports/postgres/modules/svm/svm.py_in
+++ b/src/ports/postgres/modules/svm/svm.py_in
@@ -7,6 +7,7 @@ from collections import defaultdict
 from kernel_approximation import create_kernel, load_kernel
 
 from utilities.control import MinWarning
+from utilities.control import OptimizerControl
 from utilities.in_mem_group_control import GroupIterationController
 from utilities.utilities import _assert
 from utilities.utilities import _string_to_array
@@ -815,7 +816,8 @@ def svm(schema_madlib, source_table, model_table,
         if transformer.transformed_table:
             args.update(transformer.transformed_table)
         _cross_validate_svm(args)
-        _svm_parsed_params(use_transformer_for_output=True, **args)
+        with OptimizerControl(False):
+            _svm_parsed_params(use_transformer_for_output=True, **args)
         transformer.clear()
 # ------------------------------------------------------------------------------

[madlib] 01/03: Utilities: Reuse update plan in GroupIterationController

Posted by ok...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit f37a92924d21204a83566f66e2c814ad22b88588
Author: Orhan Kislal <ok...@apache.org>
AuthorDate: Thu Jun 9 16:57:33 2022 -0400

    Utilities: Reuse update plan in GroupIterationController
    
    The group iteration controller prepares a plan and executes it during
    the update phase. For some modules, this plan does not change between
    iterations. With this commit, the plan gets saved and reused as needed.
    
    Co-authored-by: Bhuvnesh Chaudhary <bc...@pivotal.io>
---
 src/ports/postgres/modules/svm/svm.py_in           |  1 +
 .../modules/utilities/in_mem_group_control.py_in   | 68 ++++++++++++----------
 2 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/src/ports/postgres/modules/svm/svm.py_in b/src/ports/postgres/modules/svm/svm.py_in
index 1532cb24..ceddb392 100644
--- a/src/ports/postgres/modules/svm/svm.py_in
+++ b/src/ports/postgres/modules/svm/svm.py_in
@@ -62,6 +62,7 @@ def _compute_svm(args):
                 it.kwargs['stepsize'] *= it.kwargs['decay_factor']
             else:
                 it.kwargs['stepsize'] = init_stepsize / (it.iteration + 1)
+            it.update_plan = None
             has_converged = it.test(
                 """
                 {iteration} >= {max_iter}
diff --git a/src/ports/postgres/modules/utilities/in_mem_group_control.py_in b/src/ports/postgres/modules/utilities/in_mem_group_control.py_in
index 68834c04..31f7f374 100644
--- a/src/ports/postgres/modules/utilities/in_mem_group_control.py_in
+++ b/src/ports/postgres/modules/utilities/in_mem_group_control.py_in
@@ -240,6 +240,7 @@ class GroupIterationController:
         self.finished_states = state_factory(self.is_state_type_bytea8)
 
         self.group_param = self._init_group_param()
+        self.update_plan = None
 
     def _init_group_param(self):
         _grp_key = ("array_to_string(ARRAY[{grouping_str}], ',')"
@@ -549,40 +550,43 @@ class GroupIterationController:
         self.iteration = self.iteration + 1
 
         group_param = self.group_param
-        run_sql = """
-            SELECT
-                {_grp_key} AS {col_grp_key},
-                {grouping_col},
-                {iteration} AS {col_grp_iteration},
-                ({newState}) AS {col_grp_state}
-            FROM (
-                SELECT *,
-                    array_to_string(ARRAY[{grouping_str}], ',') AS {col_grp_key}
-                FROM {rel_source}
-            ) AS {as_rel_source}
-            JOIN ( {select_rel_state} ) AS {rel_state}
-            {using_str}
-            JOIN ( {select_n_tuples} ) AS _rel_n_tuples
-            {using_str}
-            {groupby_str}
-            """.format(
-            newState=newState,
-            iteration=self.iteration,
-            using_str=group_param.using_str,
-            groupby_str=group_param.groupby_str,
-            _grp_key=group_param.grp_key,
-            select_rel_state=group_param.select_rel_state,
-            select_n_tuples=group_param.select_n_tuples,
-            **self.kwargs)
 
-        update_plan = plpy.prepare(run_sql,
-                                   ["text[]", group_param.grouped_state_type,
-                                    "text[]", "integer[]"])
-        res_tuples = plpy.execute(update_plan, [self.new_states.keys,
-                                                self.new_states.values,
-                                                self.grp_to_n_tuples.keys(),
-                                                self.grp_to_n_tuples.values()])
+        if self.update_plan is None:
+            run_sql = """
+                SELECT
+                    {_grp_key} AS {col_grp_key},
+                    {grouping_col},
+                    ({newState}) AS {col_grp_state}
+                FROM (
+                    SELECT *,
+                        array_to_string(ARRAY[{grouping_str}], ',') AS {col_grp_key}
+                    FROM {rel_source}
+                ) AS {as_rel_source}
+                JOIN ( {select_rel_state} ) AS {rel_state}
+                {using_str}
+                JOIN ( {select_n_tuples} ) AS _rel_n_tuples
+                {using_str}
+                {groupby_str}
+                """.format(
+                newState=newState,
+                iteration=self.iteration,
+                using_str=group_param.using_str,
+                groupby_str=group_param.groupby_str,
+                _grp_key=group_param.grp_key,
+                select_rel_state=group_param.select_rel_state,
+                select_n_tuples=group_param.select_n_tuples,
+                **self.kwargs)
+
+            self.update_plan = plpy.prepare(run_sql,
+                                       ["text[]", group_param.grouped_state_type,
+                                        "text[]", "integer[]"])
 
+        res_tuples = plpy.execute(self.update_plan,
+                                    [self.new_states.keys,
+                                     self.new_states.values,
+                                     self.grp_to_n_tuples.keys(),
+                                     self.grp_to_n_tuples.values()])
+        res_tuples[0][self.kwargs['col_grp_iteration']] = self.iteration
         col_grp_state = self.kwargs['col_grp_state']
         col_grp_key = self.kwargs['col_grp_key']
         self.old_states.sync_from(self.new_states)

[madlib] 02/03: SVM: Fix dev-check test typo

Posted by ok...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git

commit 45f533b2c79ea27fd101f00da8b2f0b5d2d9946c
Author: Orhan Kislal <ok...@apache.org>
AuthorDate: Thu Jun 9 16:59:54 2022 -0400

    SVM: Fix dev-check test typo
---
 src/ports/postgres/modules/svm/test/svm.sql_in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ports/postgres/modules/svm/test/svm.sql_in b/src/ports/postgres/modules/svm/test/svm.sql_in
index 9dea0bb5..a8224014 100644
--- a/src/ports/postgres/modules/svm/test/svm.sql_in
+++ b/src/ports/postgres/modules/svm/test/svm.sql_in
@@ -108,9 +108,9 @@ SELECT svm_regression(
      NULL,
      'init_stepsize=0.01, max_iter=50, lambda=2, norm=l2, epsilon=0.01',
      false);
-SELECT svm_predict('svr_model', 'svr_train_data', 'id', 'svr_test_result');
+SELECT svm_predict('svr_model2', 'svr_train_data', 'id', 'svr_test_result');
 \x on
-SELECT * FROM svr_model;
+SELECT * FROM svr_model2;
 \x off
 SELECT
     assert(