You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2018/04/24 00:13:43 UTC

madlib git commit: Release 1.14: Update version numbers and support upgrading to v1.14

Repository: madlib
Updated Branches:
  refs/heads/master 0e1161c0d -> 1c81cb102


Release 1.14: Update version numbers and support upgrading to v1.14

Update the version number to 1.14 for the release candidate.
Update the changelists and other related files for upgrade.
Update the upgrade_util to ensure PG 10 support.
Simplify the _get_existing_uda function since it is not possible to
define an aggregate without any arguments.
Note that upgrade is not supported from versions prior to 1.11.

Co-authored-by: Nikhil Kak <nk...@pivotal.io>

Closes #266


Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/1c81cb10
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/1c81cb10
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/1c81cb10

Branch: refs/heads/master
Commit: 1c81cb1027decfa5634f8bab902e664c2b720abd
Parents: 0e1161c
Author: Orhan Kislal <ok...@pivotal.io>
Authored: Mon Apr 23 16:59:11 2018 -0700
Committer: Orhan Kislal <ok...@pivotal.io>
Committed: Mon Apr 23 16:59:11 2018 -0700

----------------------------------------------------------------------
 deploy/postflight.sh                  |  2 +-
 doc/mainpage.dox.in                   |  1 +
 pom.xml                               |  2 +-
 src/config/Version.yml                |  2 +-
 src/madpack/changelist_1.12_1.13.yaml | 14 ++---
 src/madpack/changelist_1.13_1.14.yaml | 99 ++++++++++++++++++++++++++++++
 src/madpack/template_changelist.yaml  | 58 +++++++++++++++++
 src/madpack/upgrade_util.py           | 57 ++++++++++-------
 8 files changed, 203 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/madlib/blob/1c81cb10/deploy/postflight.sh
----------------------------------------------------------------------
diff --git a/deploy/postflight.sh b/deploy/postflight.sh
index 8e108c9..87c6099 100755
--- a/deploy/postflight.sh
+++ b/deploy/postflight.sh
@@ -2,7 +2,7 @@
 
 # $0 - Script Path, $1 - Package Path, $2 - Target Location, and $3 - Target Volume
 
-MADLIB_VERSION=1.14-dev
+MADLIB_VERSION=1.14
 
 find $2/usr/local/madlib/bin -type d -exec cp -RPf {} $2/usr/local/madlib/old_bin \; 2>/dev/null
 find $2/usr/local/madlib/bin -depth -type d -exec rm -r {} \; 2>/dev/null

http://git-wip-us.apache.org/repos/asf/madlib/blob/1c81cb10/doc/mainpage.dox.in
----------------------------------------------------------------------
diff --git a/doc/mainpage.dox.in b/doc/mainpage.dox.in
index b93f23c..384d21b 100644
--- a/doc/mainpage.dox.in
+++ b/doc/mainpage.dox.in
@@ -17,6 +17,7 @@ Useful links:
 <li><a href="https://mail-archives.apache.org/mod_mbox/madlib-user/">User mailing list</a></li>
 <li><a href="https://mail-archives.apache.org/mod_mbox/madlib-dev/">Dev mailing list</a></li>
 <li>User documentation for earlier releases:
+    <a href="../v1.14/index.html">v1.14</a>,
     <a href="../v1.13/index.html">v1.13</a>,
     <a href="../v1.12/index.html">v1.12</a>,
     <a href="../v1.11/index.html">v1.11</a>,

http://git-wip-us.apache.org/repos/asf/madlib/blob/1c81cb10/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index fb8e6aa..5a3ba8f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -22,7 +22,7 @@
 
   <groupId>org.apache.madlib</groupId>
   <artifactId>madlib</artifactId>
-  <version>1.14-dev</version>
+  <version>1.14</version>
   <packaging>pom</packaging>
 
   <build>

http://git-wip-us.apache.org/repos/asf/madlib/blob/1c81cb10/src/config/Version.yml
----------------------------------------------------------------------
diff --git a/src/config/Version.yml b/src/config/Version.yml
index cc18d2a..2ec314d 100644
--- a/src/config/Version.yml
+++ b/src/config/Version.yml
@@ -1 +1 @@
-version: 1.14-dev
+version: 1.14

http://git-wip-us.apache.org/repos/asf/madlib/blob/1c81cb10/src/madpack/changelist_1.12_1.13.yaml
----------------------------------------------------------------------
diff --git a/src/madpack/changelist_1.12_1.13.yaml b/src/madpack/changelist_1.12_1.13.yaml
index 5c44c74..0e6c3df 100644
--- a/src/madpack/changelist_1.12_1.13.yaml
+++ b/src/madpack/changelist_1.12_1.13.yaml
@@ -59,16 +59,16 @@ udf:
         argument: character varying, character varying, character varying, character varying
     - __knn_validate_src:
         rettype: integer
-        argument: varchar, varchar, varchar, varchar, varchar, varchar, varchar, varchar, integer
+        argument: character varying, character varying, character varying, character varying, character varying, character varying, character varying, character varying, integer
     - knn:
-        rettype: varchar
-        argument: varchar, varchar, varchar, varchar, varchar, varchar, varchar, varchar, integer
+        rettype: character varying
+        argument: character varying, character varying, character varying, character varying, character varying, character varying, character varying, character varying, integer
     - knn:
-        rettype: varchar
-        argument: varchar, varchar, varchar, varchar, varchar, varchar, varchar, varchar
+        rettype: character varying
+        argument: character varying, character varying, character varying, character varying, character varying, character varying, character varying, character varying
     - knn:
         rettype: void
-        argument: varchar
+        argument: character varying
     - knn:
         rettype: void
 
@@ -77,7 +77,7 @@ udf:
 # Overloaded functions should be mentioned separately
 uda:
     - mlp_igd_step:
-        rettype: mlp_step_result
+        rettype: schema_madlib.mlp_step_result
         argument: double precision[], double precision[], double precision[], double precision[], double precision, integer, integer, double precision, boolean, double precision[], integer, double precision, double precision[], double precision[]
 
 # Casts (UDC) updated/removed

http://git-wip-us.apache.org/repos/asf/madlib/blob/1c81cb10/src/madpack/changelist_1.13_1.14.yaml
----------------------------------------------------------------------
diff --git a/src/madpack/changelist_1.13_1.14.yaml b/src/madpack/changelist_1.13_1.14.yaml
new file mode 100644
index 0000000..94435fb
--- /dev/null
+++ b/src/madpack/changelist_1.13_1.14.yaml
@@ -0,0 +1,99 @@
+# ------------------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# ------------------------------------------------------------------------------
+
+# Changelist for MADlib version 1.13 to 1.14
+
+# This file contains all changes that were introduced in a new version of
+# MADlib. This changelist is used by the upgrade script to detect what objects
+# should be upgraded (while retaining all other objects from the previous version)
+
+# New modules (actually .sql_in files) added in upgrade version
+# For these files the sql_in code is retained as is with the functions in the
+# file installed on the upgrade version. All other files (that don't have
+# updates), are cleaned up to remove object replacements
+new module:
+    # ----------------- Changes from 1.13 to 1.14 --------
+    balance_sample:
+    minibatch_preprocessing:
+
+
+# Changes in the types (UDT) including removal and modification
+udt:
+    mlp_step_result:
+    summary_result:
+# List of the UDF changes that affect the user externally. This includes change
+# in function name, return type, argument order or types, or removal of
+# the function. In each case, the original function is as good as removed and a
+# new function is created. In such cases, we should abort the upgrade if there
+# are user views dependent on this function, since the original function will
+# not be present in the upgraded version.
+udf:
+    # ----------------- Changes from 1.13 to 1.14 ----------
+    - __build_tree:
+        rettype: void
+        argument: boolean, text, text, text, text, text, boolean, text, character varying[], character varying[], character varying[], character varying[], text, text, integer, integer, integer, integer, text, smallint, text, integer
+    - internal_predict_mlp:
+        rettype: double precision[]
+        argument: double precision[], double precision[], double precision, double precision, double precision[], integer, double precision[], double precision[]
+    - mlp_igd_transition:
+        rettype: double precision[]
+        argument: double precision[], double precision[], double precision[], double precision[], double precision[], double precision, integer, integer, double precision, boolean, double precision[], double precision
+    - summary:
+        rettype: schema_madlib.summary_result
+        argument: text, text
+    - summary:
+        rettype: schema_madlib.summary_result
+        argument: text, text, text
+    - summary:
+        rettype: schema_madlib.summary_result
+        argument: text, text, text, text
+    - summary:
+        rettype: schema_madlib.summary_result
+        argument: text, text, text, text, boolean
+    - summary:
+        rettype: schema_madlib.summary_result
+        argument: text, text, text, text, boolean, boolean
+    - summary:
+        rettype: schema_madlib.summary_result
+        argument: text, text, text, text, boolean, boolean, double precision[]
+    - summary:
+        rettype: schema_madlib.summary_result
+        argument: text, text, text, text, boolean, boolean, double precision[], integer
+    - summary:
+        rettype: schema_madlib.summary_result
+        argument: text, text, text, text, boolean, boolean, double precision[], integer, boolean
+    - summary:
+        rettype: schema_madlib.summary_result
+        argument: text, text, text, text, boolean, boolean, double precision[], integer, boolean, integer
+
+
+# Changes to aggregates (UDA) including removal and modification
+# Overloaded functions should be mentioned separately
+uda:
+- mlp_igd_step:
+    rettype: double precision[]
+    argument: double precision[], double precision[], double precision[], double precision[], double precision, integer, integer, double precision, boolean, double precision[], double precision
+# Casts (UDC) updated/removed
+udc:
+
+# Operators (UDO) removed/updated
+udo:
+
+# Operator Classes (UDOC) removed/updated
+udoc:

http://git-wip-us.apache.org/repos/asf/madlib/blob/1c81cb10/src/madpack/template_changelist.yaml
----------------------------------------------------------------------
diff --git a/src/madpack/template_changelist.yaml b/src/madpack/template_changelist.yaml
new file mode 100644
index 0000000..455558f
--- /dev/null
+++ b/src/madpack/template_changelist.yaml
@@ -0,0 +1,58 @@
+# ------------------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# ------------------------------------------------------------------------------
+
+# Changelist for MADlib version <old> to <new>
+
+# This file contains all changes that were introduced in a new version of
+# MADlib. This changelist is used by the upgrade script to detect what objects
+# should be upgraded (while retaining all other objects from the previous version)
+
+# New modules (actually .sql_in files) added in upgrade version
+# For these files the sql_in code is retained as is with the functions in the
+# file installed on the upgrade version. All other files (that don't have
+# updates), are cleaned up to remove object replacements
+new module:
+    # ----------------- Changes from <old> to <new> --------
+
+
+# Changes in the types (UDT) including removal and modification
+udt:
+
+# List of the UDF changes that affect the user externally. This includes change
+# in function name, return type, argument order or types, or removal of
+# the function. In each case, the original function is as good as removed and a
+# new function is created. In such cases, we should abort the upgrade if there
+# are user views dependent on this function, since the original function will
+# not be present in the upgraded version.
+udf:
+    # ----------------- Changes from <old> to <new> ----------
+
+
+# Changes to aggregates (UDA) including removal and modification
+# Overloaded functions should be mentioned separately
+uda:
+
+# Casts (UDC) updated/removed
+udc:
+
+# Operators (UDO) removed/updated
+udo:
+
+# Operator Classes (UDOC) removed/updated
+udoc:

http://git-wip-us.apache.org/repos/asf/madlib/blob/1c81cb10/src/madpack/upgrade_util.py
----------------------------------------------------------------------
diff --git a/src/madpack/upgrade_util.py b/src/madpack/upgrade_util.py
index 62ae4a6..d38ebc8 100644
--- a/src/madpack/upgrade_util.py
+++ b/src/madpack/upgrade_util.py
@@ -71,7 +71,29 @@ class UpgradeBase:
         pg_catalog.pg_get_function_result in PG for a complete implementation, which are
         not supported by GP
         """
-        row = self._run_sql("""
+
+        # Check if the function has any arguments
+        proargtypes = self._run_sql(
+            """
+            SELECT
+                array_upper(proargtypes,1) as proargtypes
+            FROM pg_proc
+            WHERE oid = {oid}
+            """.format(oid=oid))
+        # If it does not have any arguments then the unnest will not return
+        # any rows. We need a single row with an empty string.
+        unnest_proargtypes = "\'\'::VARCHAR"
+        gen_series_proargtypes = "1"
+        if proargtypes[0]['proargtypes'] != "-1":
+            # Convert the argument types to text
+            unnest_proargtypes = "textin(regtypeout(unnest(proargtypes)::regtype))"
+            gen_series_proargtypes = "generate_series(0, array_upper(proargtypes, 1))"
+
+        # Convert the return type to text. The aggregate (max) is necessary for
+        # the array_to_string aggregate to work. Every row should have the same
+        # proname and rettype.
+        row = self._run_sql(
+            """
             SELECT
                 max(proname) AS proname,
                 max(rettype) AS rettype,
@@ -81,21 +103,14 @@ class UpgradeBase:
                 SELECT
                     proname,
                     textin(regtypeout(prorettype::regtype)) AS rettype,
-                    CASE array_upper(proargtypes,1) WHEN -1 THEN ''
-                        ELSE textin(regtypeout(unnest(proargtypes)::regtype))
-                    END AS argtype,
-                    CASE WHEN proargnames IS NULL THEN ''
-                        ELSE unnest(proargnames)
-                    END AS argname,
-                    CASE array_upper(proargtypes,1) WHEN -1 THEN 1
-                        ELSE generate_series(0, array_upper(proargtypes, 1))
-                    END AS i
+                    {unnest_proargtypes} AS argtype,
+                    {gen_series_proargtypes} AS i
                 FROM
                     pg_proc AS p
                 WHERE
                     oid = {oid}
             ) AS f
-            """.format(oid=oid))
+            """.format(**locals()))
         return {"proname": row[0]['proname'],
                 "rettype": row[0]['rettype'],
                 "argument": row[0]['argument']}
@@ -304,8 +319,9 @@ class ChangeHandler(UpgradeBase):
         res = defaultdict(bool)
         for udf in self._udf:
             for item in self._udf[udf]:
+                udf_arglist = item['argument'] if 'argument' in item else ''
                 signature = get_signature_for_compare(
-                    self._schema, udf, item['rettype'], item['argument'])
+                    self._schema, udf, item['rettype'], udf_arglist)
                 res[signature] = True
         return res
 
@@ -316,8 +332,9 @@ class ChangeHandler(UpgradeBase):
         res = defaultdict(bool)
         for uda in self._uda:
             for item in self._uda[uda]:
+                uda_arglist = item['argument'] if 'argument' in item else ''
                 signature = get_signature_for_compare(
-                    self._schema, uda, item['rettype'], item['argument'])
+                    self._schema, uda, item['rettype'], uda_arglist)
                 res[signature] = True
         return res
 
@@ -1005,8 +1022,10 @@ class ScriptCleaner(UpgradeBase):
 
     def _get_existing_uda(self):
         """
-        @brief Get the existing UDAs in the current version
+        @brief Get the existing UDAs in the current version.
         """
+        # See _get_function_info for explanations.
+
         rows = self._run_sql("""
             SELECT
                 max(proname) AS proname,
@@ -1018,14 +1037,8 @@ class ScriptCleaner(UpgradeBase):
                     p.oid AS procoid,
                     proname,
                     textin(regtypeout(prorettype::regtype)) AS rettype,
-
-                    CASE array_upper(proargtypes,1) WHEN -1 THEN ''
-                        ELSE textin(regtypeout(unnest(proargtypes)::regtype))
-                    END AS argtype,
-
-                    CASE array_upper(proargtypes,1) WHEN -1 THEN 1
-                        ELSE generate_series(0, array_upper(proargtypes, 1))
-                    END AS i
+                    textin(regtypeout(unnest(proargtypes)::regtype)) AS argtype,
+                    generate_series(0, array_upper(proargtypes, 1)) AS i
                 FROM
                     pg_proc AS p,
                     pg_namespace AS nsp