You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2017/06/12 19:00:06 UTC

incubator-madlib git commit: Summary: Update error messaging

Repository: incubator-madlib
Updated Branches:
  refs/heads/master f5915e4cb -> f50f76d78


Summary: Update error messaging


Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/f50f76d7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/f50f76d7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/f50f76d7

Branch: refs/heads/master
Commit: f50f76d78287f1e8173721a07f685d6e5bcfd862
Parents: f5915e4
Author: Rahul Iyer <ri...@apache.org>
Authored: Mon Jun 12 11:59:52 2017 -0700
Committer: Rahul Iyer <ri...@apache.org>
Committed: Mon Jun 12 11:59:52 2017 -0700

----------------------------------------------------------------------
 .../postgres/modules/summary/Summarizer.py_in   | 60 +++++++++-----------
 1 file changed, 28 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/f50f76d7/src/ports/postgres/modules/summary/Summarizer.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/summary/Summarizer.py_in b/src/ports/postgres/modules/summary/Summarizer.py_in
index 3625123..fe4d51a 100644
--- a/src/ports/postgres/modules/summary/Summarizer.py_in
+++ b/src/ports/postgres/modules/summary/Summarizer.py_in
@@ -3,6 +3,7 @@ import math
 
 from utilities.validate_args import input_tbl_valid, output_tbl_valid
 from utilities.validate_args import cols_in_tbl_valid
+from utilities.utilities import _assert
 from utilities.utilities import py_list_to_sql_string
 
 
@@ -21,9 +22,9 @@ class Summarizer:
         self._get_quartiles = get_quartiles
         self._xtileify = xtileify
         self._ntile_array = ntile_array
-        self._how_many_mfv = int(how_many_mfv)
+        self._how_many_mfv = how_many_mfv
         self._get_mfv_quick = get_mfv_quick
-        self._n_cols_per_run = int(n_cols_per_run)
+        self._n_cols_per_run = n_cols_per_run
         self._columns = None
         self._column_names = None
         self._delimiter = '_.*.&.!.!.&.*_'
@@ -63,11 +64,9 @@ class Summarizer:
             Validate the required arguments for the summary function
         """
         # source table
-        if self._source_table is None or self._source_table.strip() == '':
-            plpy.error("Summary error: Invalid data table name!")
+        _assert(self._source_table is not None and self._source_table.strip(),
+                "Summary error: Invalid data table name!")
         input_tbl_valid(self._source_table, "Summary")
-
-        # output table
         output_tbl_valid(self._output_table, "Summary")
 
     def _validate_required_cols(self, required_cols):
@@ -77,22 +76,20 @@ class Summarizer:
 
     def _validate_ntile_array(self):
         if self._ntile_array is not None:
-            if len(self._ntile_array) == 0:
-                plpy.error("""
-                    Summary -- Invalid parameter: ntile_array is empty.""")
+            _assert(len(self._ntile_array) > 0,
+                    "Summary - Invalid parameter: ntile_array is empty.")
             for ntile in self._ntile_array:
-                if ntile < 0 or ntile > 1.0:
-                    plpy.error("""
-                        Summary -- Invalid parameter: Values in ntile_array
-                        should be in the range [0.0, 1.0]""")
+                _assert(0 < ntile < 1.0,
+                        "Summary - Invalid parameter: Values in ntile_array "
+                        "should be in the range [0.0, 1.0]")
 
     def _adjust_cols(self):
         # if #cols == 1, then it should not appear in the grouping_cols
-        if len(self._column_names) == 1 and \
-                self._column_names[0] in self._grouping_cols:
+        if (len(self._column_names) == 1 and
+                self._column_names[0] in self._grouping_cols):
             self._grouping_cols.remove(self._column_names[0])
 
-    def _validate_paras(self):
+    def _validate_params(self):
         """
         Validate all parameters in the class
         """
@@ -100,13 +97,14 @@ class Summarizer:
         self._validate_required_cols(self._target_cols)
         self._validate_required_cols(self._grouping_cols)
         self._validate_ntile_array()
-        if self._how_many_mfv is None or self._how_many_mfv <= 0:
-            plpy.error("""
-                Summary -- Invalid parameter: Number of most frequent values
-                required should be positive""")
-        if self._n_cols_per_run is None or self._n_cols_per_run <= 0:
-            plpy.error(" Summary -- Invalid parameter: Number of columns per run"
-                       "should be positive")
+        _assert(self._how_many_mfv is not None and self._how_many_mfv > 0,
+                "Summary - Invalid parameter: Number of most frequent values"
+                "required should be positive")
+        self._how_many_mfv = int(self._how_many_mfv)
+        _assert(self._n_cols_per_run is not None and self._n_cols_per_run > 0,
+                "Summary - Invalid parameter: Number of columns per run"
+                "should be positive")
+        self._n_cols_per_run = int(self._n_cols_per_run)
 
 # ----- End of argument validation functions -----------------------------
 
@@ -354,22 +352,20 @@ class Summarizer:
             ntiles=ntiles)
 
     def run(self):
-        self._validate_paras()
+        self._validate_params()
         self._populate_columns()
-        if not self._columns:
-            plpy.error("Summary error: Invalid column names {0} ".format(self._target_cols))
+        _assert(self._columns,
+                "Summary error: Invalid column names {0} ".format(self._target_cols))
         self._adjust_cols()
         try:
-            plpy.execute('DROP TABLE IF EXISTS {output_table}'.format(
-                output_table=self._output_table))
+            plpy.execute('DROP TABLE IF EXISTS {0}'.format(self._output_table))
             create_table = True
         except Exception:
             plpy.error("Summary error: Invalid output table name " + self._output_table)
 
-        # set a maximum number of columns to avoid out-of-memory
-        #  issues when a lot of columns are computed concurrently
-        #  We repeat the query multiple times till computation is complete for
-        #  all columns.
+        # Set a maximum number of columns to avoid out-of-memory issues when a
+        # lot of columns are computed concurrently. Repeat the query multiple
+        # times till computation is complete for all columns.
         actual_n_cols = len(self._columns)
         # ensuring an even spread of columns in each repeated attempt. For eg.
         #  if self._n_cols_per_run = 15, to simulate 31 cols we break it down as [11, 11, 9]