You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by nj...@apache.org on 2018/06/05 00:02:19 UTC
madlib git commit: Madpack: Make install, reinstall and upgrade atomic
Repository: madlib
Updated Branches:
refs/heads/master abef95ec9 -> 8e34f68d7
Madpack: Make install, reinstall and upgrade atomic
JIRA: MADLIB-1242
We now write all the necessary sql for MADlib installation into one file,
and run it once in a single session. The database's rollback will be useful
to bring it back to original state in case of a failure during install,
reinstall, uninstall, and upgrade.
Closes #271
Co-authored-by: Rahul Iyer <ri...@apache.org>
Co-authored-by: Orhan Kislal <ok...@pivotal.io>
Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/8e34f68d
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/8e34f68d
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/8e34f68d
Branch: refs/heads/master
Commit: 8e34f68d7f7b902ebe511b934683724dc2d5dcca
Parents: abef95e
Author: Nandish Jayaram <nj...@apache.org>
Authored: Fri May 18 16:13:28 2018 -0700
Committer: Nandish Jayaram <nj...@apache.org>
Committed: Mon Jun 4 16:45:57 2018 -0700
----------------------------------------------------------------------
src/madpack/madpack.py | 950 ++++++++++++++++++++-------------------
src/madpack/upgrade_util.py | 92 ++--
src/madpack/utilities.py | 27 +-
3 files changed, 552 insertions(+), 517 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/madlib/blob/8e34f68d/src/madpack/madpack.py
----------------------------------------------------------------------
diff --git a/src/madpack/madpack.py b/src/madpack/madpack.py
index c0d007d..35e98db 100755
--- a/src/madpack/madpack.py
+++ b/src/madpack/madpack.py
@@ -14,14 +14,15 @@ import tempfile
import shutil
import upgrade_util as uu
+from utilities import _write_to_file
from utilities import error_
+from utilities import get_dbver
+from utilities import get_db_madlib_version
+from utilities import get_rev_num
from utilities import info_
from utilities import is_rev_gte
-from utilities import get_rev_num
+from utilities import remove_comments_from_sql
from utilities import run_query
-from utilities import get_madlib_dbrev
-from utilities import get_dbver
-
# Required Python version
py_min_ver = [2, 6]
@@ -53,7 +54,7 @@ maddir_lib = maddir + "/lib/libmadlib.so" # C/C++ libraries
# Read the config files
ports = configyml.get_ports(maddir_conf) # object made of Ports.yml
-rev = configyml.get_version(maddir_conf) # MADlib OS-level version
+new_madlib_ver = configyml.get_version(maddir_conf) # MADlib OS-level version
portid_list = []
for port in ports:
portid_list.append(port)
@@ -95,7 +96,6 @@ def _internal_run_query(sql, show_error):
return run_query(sql, con_args, show_error)
# ------------------------------------------------------------------------------
-
def _get_relative_maddir(maddir, port):
""" Return a relative path version of maddir
@@ -131,35 +131,38 @@ def _get_relative_maddir(maddir, port):
return maddir
# ------------------------------------------------------------------------------
-
-def _run_sql_file(schema, maddir_mod_py, module, sqlfile,
- tmpfile, logfile, pre_sql, upgrade=False,
- sc=None):
+def _cleanup_comments_in_sqlfile(output_filename, upgrade):
"""
- Run SQL file
- @param schema name of the target schema
- @param maddir_mod_py name of the module dir with Python code
- @param module name of the module
- @param sqlfile name of the file to parse
- @param tmpfile name of the temp file to run
- @param logfile name of the log file (stdout)
- @param pre_sql optional SQL to run before executing the file
- @param upgrade are we upgrading as part of this sql run
- @param sc object of ScriptCleaner
+ @brief Remove comments in the sql script, and re-write the file with the
+ cleaned up script.
+ """
+ if not upgrade:
+ with open(output_filename, 'r+') as output_filehandle:
+ full_sql = output_filehandle.read()
+ full_sql = remove_comments_from_sql(full_sql)
+ # Re-write the cleaned-up sql to a new file. Python does not let us
+ # erase all the content of a file and rewrite the same file again.
+ cleaned_output_filename = output_filename+'.tmp'
+ with open(cleaned_output_filename, 'w') as output_filehandle:
+ _write_to_file(output_filehandle, full_sql)
+ # Move the cleaned output file to the old one.
+ os.rename(cleaned_output_filename, output_filename)
+
+def _run_m4_and_append(schema, maddir_mod_py, module, sqlfile,
+ output_filehandle, pre_sql=None):
+ """
+ Function to process a sql file with M4.
"""
-
# Check if the SQL file exists
if not os.path.isfile(sqlfile):
error_(this, "Missing module SQL file (%s)" % sqlfile, False)
- raise ValueError("Missing module SQL file (%s)" % sqlfile)
+ raise ValueError
# Prepare the file using M4
try:
- f = open(tmpfile, 'w')
# Add the before SQL
if pre_sql:
- f.writelines([pre_sql, '\n\n'])
- f.flush()
+ output_filehandle.writelines([pre_sql, '\n\n'])
# Find the madpack dir (platform specific or generic)
if os.path.isdir(maddir + "/ports/" + portid + "/" + dbver + "/madpack"):
maddir_madpack = maddir + "/ports/" + portid + "/" + dbver + "/madpack"
@@ -178,24 +181,33 @@ def _run_sql_file(schema, maddir_mod_py, module, sqlfile,
sqlfile]
info_(this, "> ... parsing: " + " ".join(m4args), verbose)
-
- subprocess.call(m4args, stdout=f)
- f.close()
+ output_filehandle.flush()
+ subprocess.call(m4args, stdout=output_filehandle)
except:
error_(this, "Failed executing m4 on %s" % sqlfile, False)
raise Exception
+def _run_install_check_sql(schema, maddir_mod_py, module, sqlfile,
+ tmpfile, logfile, pre_sql):
+ """
+ Run SQL file
+ @param schema name of the target schema
+ @param maddir_mod_py name of the module dir with Python code
+ @param module name of the module
+ @param sqlfile name of the file to parse
+ @param tmpfile name of the temp file to run
+ @param logfile name of the log file (stdout)
+ @param pre_sql optional SQL to run before executing the file
+ """
+ try:
+ f = open(tmpfile, 'w')
+ _run_m4_and_append(schema, maddir_mod_py, module, sqlfile, f, pre_sql)
+ f.close()
+ except:
+ error_(this, "Failed to temp m4 processed file %s." % tmpfile, False)
+ raise Exception
# Only update function definition
sub_module = ''
- if upgrade:
- # get filename from complete path without the extension
- sub_module = os.path.splitext(os.path.basename(sqlfile))[0]
- info_(this, sub_module, verbose)
- if sub_module not in sc.get_change_handler().newmodule:
- sql = open(tmpfile).read()
- sql = sc.cleanup(sql)
- open(tmpfile, 'w').write(sql)
-
# Run the SQL using DB command-line utility
if portid in SUPPORTED_PORTS:
sqlcmd = 'psql'
@@ -238,6 +250,88 @@ def _run_sql_file(schema, maddir_mod_py, module, sqlfile,
return retval
# ------------------------------------------------------------------------------
+def _run_sql_file(schema, sqlfile):
+ """
+ Run SQL file
+ @param schema name of the target schema
+ @param sqlfile name of the file to parse
+ """
+ # Run the SQL using DB command-line utility
+ if portid in SUPPORTED_PORTS:
+ sqlcmd = 'psql'
+ # Test the DB cmd line utility
+ std, err = subprocess.Popen(['which', sqlcmd], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE).communicate()
+ if not std:
+ error_(this, "Command not found: %s" % sqlcmd, True)
+
+ runcmd = [sqlcmd, '-a',
+ '-v', 'ON_ERROR_STOP=1',
+ '-h', con_args['host'].split(':')[0],
+ '-p', con_args['host'].split(':')[1],
+ '-d', con_args['database'],
+ '-U', con_args['user'],
+ '--no-password',
+ '--single-transaction',
+ '-f', sqlfile]
+ runenv = os.environ
+ if 'password' in con_args:
+ runenv["PGPASSWORD"] = con_args['password']
+ runenv["PGOPTIONS"] = '-c client_min_messages=notice'
+
+ # Open log file
+ logfile = sqlfile + '.log'
+ try:
+ log = open(logfile, 'w')
+ except:
+ error_(this, "Cannot create log file: %s" % logfile, False)
+ raise Exception
+
+ # Run the SQL
+ try:
+ info_(this, "> ... executing " + sqlfile, verbose)
+ info_(this, ' '.join(runcmd), verbose)
+ retval = subprocess.call(runcmd, env=runenv, stdout=log, stderr=log)
+ except:
+ error_(this, "Failed executing %s" % sqlfile, False)
+ raise Exception
+ finally:
+ log.close()
+ # Check the exit status
+ result = _parse_result_logfile(retval, logfile, sqlfile)
+ return result
+# ------------------------------------------------------------------------------
+
+def _parse_result_logfile(retval, logfile, sql_abspath,
+ sql_filename=None, module=None, milliseconds=None):
+ """
+ Function to parse the logfile and return if its content indicate a failure
+ or success.
+ """
+ is_install_check_logfile = bool(sql_filename and module)
+ # Check the exit status
+ if retval != 0:
+ result = 'FAIL'
+ global keeplogs
+ keeplogs = True
+ # Since every single statement in the test file gets logged,
+ # an empty log file indicates an empty or a failed test
+ elif os.path.isfile(logfile) and os.path.getsize(logfile) > 0:
+ result = 'PASS'
+ # Otherwise
+ else:
+ result = 'ERROR'
+
+ if is_install_check_logfile:
+ # Output result
+ print "TEST CASE RESULT|Module: " + module + \
+ "|" + os.path.basename(sql_filename) + "|" + result + \
+ "|Time: %d milliseconds" % (milliseconds)
+
+ if result == 'FAIL':
+ info_(this, "Failed executing %s" % sql_abspath, True)
+ info_(this, "Check the log at %s" % logfile, True)
+ return result
def _check_db_port(portid):
"""
@@ -259,22 +353,22 @@ def _check_db_port(portid):
# ------------------------------------------------------------------------------
-def _print_revs(rev, dbrev, con_args, schema):
+def _print_vers(new_madlib_ver, db_madlib_ver, con_args, schema):
"""
Print version information
- @param rev OS-level MADlib version
- @param dbrev DB-level MADlib version
+ @param new_madlib_ver OS-level MADlib version
+ @param db_madlib_ver DB-level MADlib version
@param con_args database connection arguments
@param schema MADlib schema name
"""
- info_(this, "MADlib tools version = %s (%s)" % (str(rev), sys.argv[0]), True)
+ info_(this, "MADlib tools version = %s (%s)" % (str(new_madlib_ver), sys.argv[0]), True)
if con_args:
try:
info_(this, "MADlib database version = %s (host=%s, db=%s, schema=%s)"
- % (dbrev, con_args['host'], con_args['database'], schema), True)
+ % (db_madlib_ver, con_args['host'], con_args['database'], schema), True)
except:
info_(this, "MADlib database version = [Unknown] (host=%s, db=%s, schema=%s)"
- % (dbrev, con_args['host'], con_args['database'], schema), True)
+ % (db_madlib_ver, con_args['host'], con_args['database'], schema), True)
return
# ------------------------------------------------------------------------------
@@ -329,112 +423,49 @@ def _plpy_check(py_min_ver):
# ------------------------------------------------------------------------------
-def _db_install(schema, dbrev, testcase):
+def _db_install(schema, is_schema_in_db, filehandle, testcase):
"""
Install MADlib
@param schema MADlib schema name
- @param dbrev DB-level MADlib version
+ @param is_schema_in_db flag to indicate if schema is already present
+ @param filehandle file that contains the sql for installation
@param testcase command-line args for a subset of modules
"""
- info_(this, "Installing MADlib into %s schema..." % schema.upper(), True)
-
- temp_schema = schema + '_v' + ''.join(map(str, get_rev_num(dbrev)))
- # Check the status of MADlib objects in database
- madlib_exists = False if dbrev is None else True
-
- # Test if schema is writable
+ # Create MADlib objects
try:
- _internal_run_query("CREATE TABLE %s.__madlib_test_table (A INT);" % schema, False)
- _internal_run_query("DROP TABLE %s.__madlib_test_table;" % schema, False)
- schema_writable = True
+ _db_create_schema(schema, is_schema_in_db, filehandle)
+ _db_create_objects(schema, filehandle, testcase=testcase)
except:
- schema_writable = False
- # CASE #1: Target schema exists with MADlib objects:
- if schema_writable and madlib_exists:
- schema_overwrite_msg = (
- "***************************************************************************"
- "* Schema {0} already exists"
- "* Installer will rename it to {1}"
- "***************************************************************************"
- "Would you like to continue? [Y/N]".
- format(schema.upper(), temp_schema.upper()))
- info_(this, schema_overwrite_msg)
- go = raw_input('>>> ').upper()
- while go not in ('Y', 'YES', 'N', 'NO'):
- go = raw_input('Yes or No >>> ').upper()
- if go in ('N', 'NO'):
- info_(this, 'Installation stopped.', True)
- return
-
- # Rename MADlib schema
- _db_rename_schema(schema, temp_schema)
-
- # Create MADlib schema
- try:
- _db_create_schema(schema)
- except:
- _db_rollback(schema, temp_schema)
-
- # Create MADlib objects
- try:
- _db_create_objects(schema, temp_schema, testcase=testcase)
- except:
- _db_rollback(schema, temp_schema)
-
- # CASE #2: Target schema exists w/o MADlib objects:
- elif schema_writable and not madlib_exists:
- # Create MADlib objects
- try:
- _db_create_objects(schema, None, testcase=testcase)
- except:
- error_(this, "Building database objects failed. "
- "Before retrying: drop %s schema OR install MADlib into "
- "a different schema." % schema.upper(), True)
-
- #
- # CASE #3: Target schema does not exist:
- #
- elif not schema_writable:
- info_(this, "> Schema %s does not exist" % schema.upper(), verbose)
-
- # Create MADlib schema
- try:
- _db_create_schema(schema)
- except:
- _db_rollback(schema, None)
+ error_(this, "Building database objects failed. "
+ "Before retrying: drop %s schema OR install MADlib into "
+ "a different schema." % schema, True)
- # Create MADlib objects
- try:
- _db_create_objects(schema, None, testcase=testcase)
- except:
- _db_rollback(schema, None)
-
- info_(this, "MADlib %s installed successfully in %s schema." % (str(rev), schema.upper()))
# ------------------------------------------------------------------------------
-def _db_upgrade(schema, dbrev):
+def _db_upgrade(schema, filehandle, db_madlib_ver):
"""
Upgrade MADlib
@param schema MADlib schema name
- @param dbrev DB-level MADlib version
+ @param filehandle Handle to output file
+ @param db_madlib_ver DB-level MADlib version
"""
- if is_rev_gte(get_rev_num(dbrev), get_rev_num(rev)):
+ if is_rev_gte(get_rev_num(db_madlib_ver), get_rev_num(new_madlib_ver)):
info_(this, "Current MADlib version already up to date.", True)
- return
+ return 1
- if is_rev_gte(get_rev_num('1.9.1'), get_rev_num(dbrev)):
+ if is_rev_gte(get_rev_num('1.9.1'), get_rev_num(db_madlib_ver)):
error_(this, """
MADlib versions prior to v1.10 are not supported for upgrade.
Please try upgrading to v1.10 and then upgrade to this version.
""", True)
- return
+ return 1
- info_(this, "Upgrading MADlib into %s schema..." % schema.upper(), True)
+ info_(this, "Upgrading MADlib into %s schema..." % schema, True)
info_(this, "\tDetecting dependencies...", True)
info_(this, "\tLoading change list...", True)
- ch = uu.ChangeHandler(schema, portid, con_args, maddir, dbrev)
+ ch = uu.ChangeHandler(schema, portid, con_args, maddir, db_madlib_ver, filehandle)
info_(this, "\tDetecting table dependencies...", True)
td = uu.TableDependency(schema, portid, con_args)
@@ -537,9 +568,8 @@ def _db_upgrade(schema, dbrev):
ch.drop_changed_udf()
ch.drop_changed_udt() # assume dependent udf for udt does not change
ch.drop_traininginfo_4dt() # used types: oid, text, integer, float
- _db_create_objects(schema, None, True, sc)
-
- info_(this, "MADlib %s upgraded successfully in %s schema." % (str(rev), schema.upper()), True)
+ _db_create_objects(schema, filehandle, True, sc)
+ return 0
# ------------------------------------------------------------------------------
@@ -550,7 +580,7 @@ def _db_rename_schema(from_schema, to_schema):
@param to_schema new name for the schema
"""
- info_(this, "> Renaming schema %s to %s" % (from_schema.upper(), to_schema.upper()), True)
+ info_(this, "> Renaming schema %s to %s" % (from_schema, to_schema), True)
try:
_internal_run_query("ALTER SCHEMA %s RENAME TO %s;" % (from_schema, to_schema), True)
except:
@@ -559,71 +589,59 @@ def _db_rename_schema(from_schema, to_schema):
# ------------------------------------------------------------------------------
-def _db_create_schema(schema):
+def _db_create_schema(schema, is_schema_in_db, filehandle):
"""
Create schema
@param from_schema name of the schema to rename
+ @param is_schema_in_db flag to indicate if schema is already present
@param to_schema new name for the schema
"""
- info_(this, "> Creating %s schema" % schema.upper(), True)
- try:
- _internal_run_query("CREATE SCHEMA %s;" % schema, True)
- except:
- info_(this, 'Cannot create new schema. Rolling back installation...', True)
- pass
+ if not is_schema_in_db:
+ _write_to_file(filehandle, "CREATE SCHEMA %s;" % schema)
# ------------------------------------------------------------------------------
-
-def _db_create_objects(schema, old_schema, upgrade=False, sc=None, testcase=""):
+def _db_create_objects(schema, create_obj_handle,
+ upgrade=False, sc=None, testcase=""):
"""
Create MADlib DB objects in the schema
@param schema Name of the target schema
+ @param create_obj_handle file handle for sql output file
+ @param upgrade flag to indicate if it's an upgrade operation or not
@param sc ScriptCleaner object
@param testcase Command-line args for modules to install
"""
if not upgrade:
# Create MigrationHistory table
try:
- info_(this, "> Creating %s.MigrationHistory table" % schema.upper(), True)
- _internal_run_query("DROP TABLE IF EXISTS %s.migrationhistory;" % schema, True)
+ _write_to_file(create_obj_handle,
+ "DROP TABLE IF EXISTS %s.migrationhistory;" % schema)
sql = """CREATE TABLE %s.migrationhistory
(id serial, version varchar(255),
- applied timestamp default current_timestamp);""" % schema
- _internal_run_query(sql, True)
+ applied timestamp default current_timestamp);
+ """ % schema
+ _write_to_file(create_obj_handle, sql)
except:
error_(this, "Cannot crate MigrationHistory table", False)
raise Exception
- # Copy MigrationHistory table for record keeping purposes
- if old_schema:
- try:
- info_(this, "> Saving data from %s.MigrationHistory table" % old_schema.upper(), True)
- sql = """INSERT INTO %s.migrationhistory (version, applied)
- SELECT version, applied FROM %s.migrationhistory
- ORDER BY id;""" % (schema, old_schema)
- _internal_run_query(sql, True)
- except:
- error_(this, "Cannot copy MigrationHistory table", False)
- raise Exception
-
# Stamp the DB installation
try:
- info_(this, "> Writing version info in MigrationHistory table", True)
- _internal_run_query("INSERT INTO %s.migrationhistory(version) "
- "VALUES('%s')" % (schema, str(rev)), True)
+ _write_to_file(create_obj_handle,
+ """INSERT INTO %s.migrationhistory(version)
+ VALUES('%s');
+ """ % (schema, str(new_madlib_ver)))
except:
error_(this, "Cannot insert data into %s.migrationhistory table" % schema, False)
raise Exception
# Run migration SQLs
- if upgrade:
- info_(this, "> Creating/Updating objects for modules:", True)
- else:
- info_(this, "> Creating objects for modules:", True)
+ info_(this, "> Preparing objects for the following modules:", True)
- caseset = (set([test.strip() for test in testcase.split(',')])
- if testcase != "" else set())
+ if testcase:
+ caseset = set([test.strip() for test in testcase.split(',')])
+ else:
+ caseset = set()
modset = {}
for case in caseset:
@@ -684,47 +702,19 @@ def _db_create_objects(schema, old_schema, upgrade=False, sc=None, testcase=""):
and algoname not in modset[module]:
continue
- # Set file names
- tmpfile = cur_tmpdir + '/' + os.path.basename(sqlfile) + '.tmp'
- logfile = cur_tmpdir + '/' + os.path.basename(sqlfile) + '.log'
- retval = _run_sql_file(schema, maddir_mod_py, module, sqlfile,
- tmpfile, logfile, None, upgrade,
- sc)
- # Check the exit status
- if retval != 0:
- error_(this, "TEST CASE RESULTed executing %s" % tmpfile, False)
- error_(this, "Check the log at %s" % logfile, False)
- raise Exception
-# ------------------------------------------------------------------------------
-
-
-def _db_rollback(drop_schema, keep_schema):
- """
- Rollback installation
- @param drop_schema name of the schema to drop
- @param keep_schema name of the schema to rename and keep
- """
- info_(this, "Rolling back the installation...", True)
-
- if not drop_schema:
- error_(this, 'No schema name to drop. Stopping rollback...', True)
-
- # Drop the current schema
- info_(this, "> Dropping schema %s" % drop_schema.upper(), verbose)
- try:
- _internal_run_query("DROP SCHEMA %s CASCADE;" % (drop_schema), True)
- except:
- error_(this, "Cannot drop schema %s. Stopping rollback..." % drop_schema.upper(), True)
-
- # Rename old to current schema
- if keep_schema:
- _db_rename_schema(keep_schema, drop_schema)
+ if not upgrade:
+ _run_m4_and_append(schema, maddir_mod_py, module, sqlfile,
+ create_obj_handle, None)
+ else:
+ tmpfile = cur_tmpdir + '/' + os.path.basename(sqlfile) + '.tmp'
+ with open(tmpfile, 'w+') as tmphandle:
+ _run_m4_and_append(schema, maddir_mod_py, module, sqlfile,
+ tmphandle, None)
+ processed_sql = sc.cleanup(open(tmpfile).read(), algoname)
+ _write_to_file(create_obj_handle, processed_sql)
- info_(this, "Rollback finished successfully.", True)
- raise Exception
# ------------------------------------------------------------------------------
-
def unescape(string):
"""
Unescape separation characters in connection strings, i.e., remove first
@@ -736,7 +726,6 @@ def unescape(string):
return re.sub(r'\\(?P<char>[/@:\\])', '\g<char>', string)
# ------------------------------------------------------------------------------
-
def parseConnectionStr(connectionStr):
"""
@brief Parse connection strings of the form
@@ -760,11 +749,10 @@ def parseConnectionStr(connectionStr):
unescape(match.group('database')))
# ------------------------------------------------------------------------------
-
def parse_arguments():
parser = argparse.ArgumentParser(
prog="madpack",
- description='MADlib package manager (' + str(rev) + ')',
+ description='MADlib package manager (' + str(new_madlib_ver) + ')',
argument_default=False,
formatter_class=argparse.RawTextHelpFormatter,
epilog="""Example:
@@ -824,6 +812,264 @@ def parse_arguments():
# Get the arguments
return parser.parse_args()
+def run_install_check(args, testcase):
+ schema = args['schema']
+ db_madlib_ver = args['db_madlib_ver']
+ # 1) Compare OS and DB versions. Continue if OS = DB.
+ if get_rev_num(db_madlib_ver) != get_rev_num(new_madlib_ver):
+ _print_vers(new_madlib_ver, db_madlib_ver, con_args, schema)
+ info_(this, "Versions do not match. Install-check stopped.", True)
+ return
+
+ # Create install-check user
+ test_user = ('madlib_' +
+ new_madlib_ver.replace('.', '').replace('-', '_') +
+ '_installcheck')
+ try:
+ _internal_run_query("DROP USER IF EXISTS %s;" % (test_user), False)
+ except:
+ _internal_run_query("DROP OWNED BY %s CASCADE;" % (test_user), True)
+ _internal_run_query("DROP USER IF EXISTS %s;" % (test_user), True)
+ _internal_run_query("CREATE USER %s;" % (test_user), True)
+
+ _internal_run_query("GRANT USAGE ON SCHEMA %s TO %s;" % (schema, test_user), True)
+
+ # 2) Run test SQLs
+ info_(this, "> Running test scripts for:", verbose)
+
+ caseset = (set([test.strip() for test in testcase.split(',')])
+ if testcase != "" else set())
+
+ modset = {}
+ for case in caseset:
+ if case.find('/') > -1:
+ [mod, algo] = case.split('/')
+ if mod not in modset:
+ modset[mod] = []
+ if algo not in modset[mod]:
+ modset[mod].append(algo)
+ else:
+ modset[case] = []
+
+ # Loop through all modules
+ for moduleinfo in portspecs['modules']:
+
+ # Get module name
+ module = moduleinfo['name']
+
+ # Skip if doesn't meet specified modules
+ if modset is not None and len(modset) > 0 and module not in modset:
+ continue
+ # JIRA: MADLIB-1078 fix
+ # Skip pmml during install-check (when run without the -t option).
+ # We can still run install-check on pmml with '-t' option.
+ if not modset and module in ['pmml']:
+ continue
+ info_(this, "> - %s" % module, verbose)
+
+ # Make a temp dir for this module (if doesn't exist)
+ cur_tmpdir = tmpdir + '/' + module + '/test' # tmpdir is a global variable
+ _make_dir(cur_tmpdir)
+
+ # Find the Python module dir (platform specific or generic)
+ if os.path.isdir(maddir + "/ports/" + portid + "/" + dbver + "/modules/" + module):
+ maddir_mod_py = maddir + "/ports/" + portid + "/" + dbver + "/modules"
+ else:
+ maddir_mod_py = maddir + "/modules"
+
+ # Find the SQL module dir (platform specific or generic)
+ if os.path.isdir(maddir + "/ports/" + portid + "/modules/" + module):
+ maddir_mod_sql = maddir + "/ports/" + portid + "/modules"
+ else:
+ maddir_mod_sql = maddir + "/modules"
+
+ # Prepare test schema
+ test_schema = "madlib_installcheck_%s" % (module)
+ _internal_run_query("DROP SCHEMA IF EXISTS %s CASCADE; CREATE SCHEMA %s;" %
+ (test_schema, test_schema), True)
+ _internal_run_query("GRANT ALL ON SCHEMA %s TO %s;" %
+ (test_schema, test_user), True)
+
+ # Switch to test user and prepare the search_path
+ pre_sql = '-- Switch to test user:\n' \
+ 'SET ROLE %s;\n' \
+ '-- Set SEARCH_PATH for install-check:\n' \
+ 'SET search_path=%s,%s;\n' \
+ % (test_user, test_schema, schema)
+
+ # Loop through all test SQL files for this module
+ sql_files = maddir_mod_sql + '/' + module + '/test/*.sql_in'
+ for sqlfile in sorted(glob.glob(sql_files), reverse=True):
+ algoname = os.path.basename(sqlfile).split('.')[0]
+ # run only algo specified
+ if (module in modset and modset[module] and
+ algoname not in modset[module]):
+ continue
+
+ # Set file names
+ tmpfile = cur_tmpdir + '/' + os.path.basename(sqlfile) + '.tmp'
+ logfile = cur_tmpdir + '/' + os.path.basename(sqlfile) + '.log'
+
+ # If there is no problem with the SQL file
+ milliseconds = 0
+
+ # Run the SQL
+ run_start = datetime.datetime.now()
+ retval = _run_install_check_sql(schema, maddir_mod_py,
+ module, sqlfile, tmpfile,
+ logfile, pre_sql)
+ # Runtime evaluation
+ run_end = datetime.datetime.now()
+ milliseconds = round((run_end - run_start).seconds * 1000 +
+ (run_end - run_start).microseconds / 1000)
+
+ # Check the exit status
+ result = _parse_result_logfile(retval, logfile, tmpfile, sqlfile,
+ module, milliseconds)
+
+ # Cleanup test schema for the module
+ _internal_run_query("DROP SCHEMA IF EXISTS %s CASCADE;" % (test_schema), True)
+
+ # Drop install-check user
+ _internal_run_query("DROP OWNED BY %s CASCADE;" % (test_user), True)
+ _internal_run_query("DROP USER %s;" % (test_user), True)
+
+def _append_uninstall_madlib_sqlfile(schema, db_madlib_ver, is_schema_in_db,
+ output_filehandle):
+ if get_rev_num(db_madlib_ver) == [0]:
+ info_(this, "Nothing to uninstall. No version found in schema %s." % schema, True)
+ return 1, is_schema_in_db
+
+ # Find any potential data to lose
+ affected_objects = _internal_run_query("""
+ SELECT
+ n1.nspname AS schema,
+ relname AS relation,
+ attname AS column,
+ typname AS type
+ FROM
+ pg_attribute a,
+ pg_class c,
+ pg_type t,
+ pg_namespace n,
+ pg_namespace n1
+ WHERE
+ n.nspname = '%s'
+ AND t.typnamespace = n.oid
+ AND a.atttypid = t.oid
+ AND c.oid = a.attrelid
+ AND c.relnamespace = n1.oid
+ AND c.relkind = 'r'
+ ORDER BY
+ n1.nspname, relname, attname, typname""" % schema.lower(), True)
+
+ info_(this, "*** Uninstalling MADlib ***", True)
+ info_(this, "***********************************************************************************", True)
+ info_(this, "* Schema %s and all database objects depending on it will be dropped!" % schema, True)
+ if affected_objects:
+ info_(this, "* If you continue the following data will be lost (schema : table.column : type):", True)
+ for ao in affected_objects:
+ info_(this, '* - ' + ao['schema'] + ' : ' + ao['relation'] + '.' +
+ ao['column'] + ' : ' + ao['type'], True)
+ info_(this, "***********************************************************************************", True)
+ info_(this, "Would you like to continue? [Y/N]", True)
+ go = raw_input('>>> ').upper()
+ while (go not in ('Y', 'N', 'YES', 'NO')):
+ go = raw_input('Yes or No >>> ').upper()
+
+ # 2) Do the uninstall/drop
+ if go in ('N', 'NO'):
+ info_(this, 'No problem. Nothing dropped.', True)
+ return 1, is_schema_in_db
+ elif go in ('Y', 'YES'):
+ try:
+ _write_to_file(output_filehandle,
+ "DROP SCHEMA %s CASCADE;" % (schema))
+ is_schema_in_db = False
+ return 0, is_schema_in_db
+ except:
+ error_(this, "Cannot drop schema %s." % schema, True)
+
+ else:
+ return 1, is_schema_in_db
+
+def _append_install_madlib_sqlfile(schema, db_madlib_ver, is_schema_in_db,
+ madpack_cmd, testcase, output_filehandle):
+ # Refresh MADlib version in DB, None for GP/PG
+ if madpack_cmd == 'reinstall':
+ info_(this, "Setting MADlib database version to be None for reinstall", verbose)
+ db_madlib_ver = None
+
+ info_(this, "*** Installing MADlib ***", True)
+
+ # 1) Compare OS and DB versions.
+ # noop if OS <= DB.
+ _print_vers(new_madlib_ver, db_madlib_ver, con_args, schema)
+
+ if db_madlib_ver is None:
+ # Case when there is no existing MADlib installation, proceed to create
+ # objects if nothing installed in DB
+ pass
+ elif is_rev_gte(get_rev_num(db_madlib_ver), get_rev_num(new_madlib_ver)):
+ # Case when existing MADlib version is the same/higher as the new installation.
+ info_(this, "Current MADlib version already up to date.", True)
+ return 1
+ else:
+ # Case when the existing MADlib installation is lower than the new
+ # installation. Error out and refer to upgrade if OS > DB
+ error_(this, """Aborting installation: existing MADlib version detected in {0} schema
+ To upgrade the {0} schema to MADlib v{1} please run the following command:
+ madpack upgrade -s {0} -p {2} [-c ...]
+ """.format(schema, new_madlib_ver, portid), True)
+
+ # 2) Run installation
+ _plpy_check(py_min_ver)
+ _db_install(schema, is_schema_in_db, output_filehandle,
+ testcase)
+ return 0
+
+def create_install_madlib_sqlfile(args, madpack_cmd, testcase):
+ upgrade = args['upgrade']
+ schema = args['schema']
+ db_madlib_ver = args['db_madlib_ver']
+ is_schema_in_db = args['is_schema_in_db']
+ return_signal = 0
+ with open(args['output_filename'], 'a+') as output_filehandle:
+ # COMMAND: uninstall/reinstall
+ if madpack_cmd in ('uninstall', 'reinstall'):
+ return_signal, is_schema_in_db = _append_uninstall_madlib_sqlfile(
+ schema, db_madlib_ver, is_schema_in_db, output_filehandle)
+
+ # COMMAND: install/reinstall
+ if madpack_cmd in ('install', 'reinstall'):
+ return_signal += _append_install_madlib_sqlfile(schema, db_madlib_ver,
+ is_schema_in_db, madpack_cmd, testcase, output_filehandle)
+
+ # COMMAND: upgrade
+ if madpack_cmd in ('upgrade', 'update'):
+ upgrade = True
+ info_(this, "*** Upgrading MADlib ***", True)
+ db_madlib_ver = get_db_madlib_version(con_args, schema)
+
+ # 1) Check DB version. If None, nothing to upgrade.
+ if not db_madlib_ver:
+ info_(this, "MADlib is not installed in {schema} schema and there "
+ "is nothing to upgrade. Please use install "
+ "instead.".format(schema=schema),
+ True)
+ return_signal += 1
+
+ # 2) Compare OS and DB versions. Continue if OS > DB.
+ _print_vers(new_madlib_ver, db_madlib_ver, con_args, schema)
+ if is_rev_gte(get_rev_num(db_madlib_ver), get_rev_num(new_madlib_ver)):
+ info_(this, "Current MADlib version is already up-to-date.", True)
+ return_signal += 1
+
+ # 3) Run upgrade
+ _plpy_check(py_min_ver)
+ return_signal = _db_upgrade(schema, output_filehandle, db_madlib_ver)
+
+ return 1 if return_signal > 0 else 0
def main(argv):
args = parse_arguments()
@@ -907,7 +1153,7 @@ def main(argv):
maddir = _get_relative_maddir(maddir, portid)
# Get MADlib version in DB
- dbrev = get_madlib_dbrev(con_args, schema)
+ db_madlib_ver = get_db_madlib_version(con_args, schema)
portdir = os.path.join(maddir, "ports", portid)
supportedVersions = [dirItem for dirItem in os.listdir(portdir)
@@ -974,12 +1220,12 @@ def main(argv):
portspecs = configyml.get_modules(maddir_conf)
else:
con_args = None
- dbrev = None
+ db_madlib_ver = None
# Parse COMMAND argument and compare with Ports.yml
# Debugging...
- # print "OS rev: " + str(rev) + " > " + str(get_rev_num(rev))
- # print "DB rev: " + str(dbrev) + " > " + str(get_rev_num(dbrev))
+ # print "OS new_madlib_ver: " + str(new_madlib_ver) + " > " + str(get_rev_num(new_madlib_ver))
+ # print "DB new_madlib_ver: " + str(db_madlib_ver) + " > " + str(get_rev_num(db_madlib_ver))
# Make sure we have the necessary parameters to continue
if args.command[0] != 'version':
@@ -987,275 +1233,45 @@ def main(argv):
error_(this, "Missing -p/--platform parameter.", True)
if not con_args:
error_(this, "Unknown problem with database connection string: %s" % con_args, True)
+ # ---------------- Completed "Get and validate arguments" -----------------
# COMMAND: version
if args.command[0] == 'version':
- _print_revs(rev, dbrev, con_args, schema)
-
- # COMMAND: uninstall/reinstall
- if args.command[0] in ('uninstall', 'reinstall'):
- if get_rev_num(dbrev) == [0]:
- info_(this, "Nothing to uninstall. No version found in schema %s." % schema.upper(), True)
- return
-
- # Find any potential data to lose
- affected_objects = _internal_run_query("""
- SELECT
- n1.nspname AS schema,
- relname AS relation,
- attname AS column,
- typname AS type
- FROM
- pg_attribute a,
- pg_class c,
- pg_type t,
- pg_namespace n,
- pg_namespace n1
- WHERE
- n.nspname = '%s'
- AND t.typnamespace = n.oid
- AND a.atttypid = t.oid
- AND c.oid = a.attrelid
- AND c.relnamespace = n1.oid
- AND c.relkind = 'r'
- ORDER BY
- n1.nspname, relname, attname, typname""" % schema.lower(), True)
-
- info_(this, "*** Uninstalling MADlib ***", True)
- info_(this, "***********************************************************************************", True)
- info_(this, "* Schema %s and all database objects depending on it will be dropped!" % schema.upper(), True)
- if affected_objects:
- info_(this, "* If you continue the following data will be lost (schema : table.column : type):", True)
- for ao in affected_objects:
- info_(this, '* - ' + ao['schema'] + ' : ' + ao['relation'] + '.' +
- ao['column'] + ' : ' + ao['type'], True)
- info_(this, "***********************************************************************************", True)
- info_(this, "Would you like to continue? [Y/N]", True)
- go = raw_input('>>> ').upper()
- while go != 'Y' and go != 'N':
- go = raw_input('Yes or No >>> ').upper()
-
- # 2) Do the uninstall/drop
- if go == 'N':
- info_(this, 'No problem. Nothing dropped.', True)
- return
-
- elif go == 'Y':
- info_(this, "> dropping schema %s" % schema.upper(), verbose)
- try:
- _internal_run_query("DROP SCHEMA %s CASCADE;" % (schema), True)
- except:
- error_(this, "Cannot drop schema %s." % schema.upper(), True)
-
- info_(this, 'Schema %s (and all dependent objects) has been dropped.' % schema.upper(), True)
- info_(this, 'MADlib uninstalled successfully.', True)
-
- else:
- return
-
- # COMMAND: install/reinstall
- if args.command[0] in ('install', 'reinstall'):
- # Refresh MADlib version in DB, None for GP/PG
- if args.command[0] == 'reinstall':
- print "Setting MADlib database version to be None for reinstall"
- dbrev = None
-
- info_(this, "*** Installing MADlib ***", True)
-
- # 1) Compare OS and DB versions.
- # noop if OS <= DB.
- _print_revs(rev, dbrev, con_args, schema)
- if is_rev_gte(get_rev_num(dbrev), get_rev_num(rev)):
- info_(this, "Current MADlib version already up to date.", True)
- return
- # proceed to create objects if nothing installed in DB
- elif dbrev is None:
- pass
- # error and refer to upgrade if OS > DB
- else:
- error_(this, """Aborting installation: existing MADlib version detected in {0} schema
- To upgrade the {0} schema to MADlib v{1} please run the following command:
- madpack upgrade -s {0} -p {2} [-c ...]
- """.format(schema, rev, portid), True)
-
- # 2) Run installation
- try:
- _plpy_check(py_min_ver)
- _db_install(schema, dbrev, args.testcase)
- except:
- error_(this, "MADlib installation failed.", True)
-
- # COMMAND: upgrade
- if args.command[0] in ('upgrade', 'update'):
- info_(this, "*** Upgrading MADlib ***", True)
- dbrev = get_madlib_dbrev(con_args, schema)
-
- # 1) Check DB version. If None, nothing to upgrade.
- if not dbrev:
- info_(this, "MADlib is not installed in {schema} schema and there "
- "is nothing to upgrade. Please use install "
- "instead.".format(schema=schema.upper()),
- True)
- return
-
- # 2) Compare OS and DB versions. Continue if OS > DB.
- _print_revs(rev, dbrev, con_args, schema)
- if is_rev_gte(get_rev_num(dbrev), get_rev_num(rev)):
- info_(this, "Current MADlib version is already up-to-date.", True)
- return
-
- if float('.'.join(dbrev.split('.')[0:2])) < 1.0:
- info_(this, "The version gap is too large, upgrade is supported only for "
- "packages greater than or equal to v1.0.", True)
- return
-
- # 3) Run upgrade
- try:
- _plpy_check(py_min_ver)
- _db_upgrade(schema, dbrev)
- except Exception as e:
- # Uncomment the following lines when debugging
- print "Exception: " + str(e)
- print sys.exc_info()
- traceback.print_tb(sys.exc_info()[2])
- error_(this, "MADlib upgrade failed.", True)
+ _print_vers(new_madlib_ver, db_madlib_ver, con_args, schema)
# COMMAND: install-check
if args.command[0] == 'install-check':
-
- # 1) Compare OS and DB versions. Continue if OS = DB.
- if get_rev_num(dbrev) != get_rev_num(rev):
- _print_revs(rev, dbrev, con_args, schema)
- info_(this, "Versions do not match. Install-check stopped.", True)
- return
-
- # Create install-check user
- test_user = ('madlib_' +
- rev.replace('.', '').replace('-', '_') +
- '_installcheck')
+ run_install_check(locals(), args.testcase)
+ else:
try:
- _internal_run_query("DROP USER IF EXISTS %s;" % (test_user), False)
+ is_schema_in_db = _internal_run_query("SELECT schema_name FROM information_schema.schemata WHERE schema_name='%s';" % schema, True)
except:
- _internal_run_query("DROP OWNED BY %s CASCADE;" % (test_user), True)
- _internal_run_query("DROP USER IF EXISTS %s;" % (test_user), True)
- _internal_run_query("CREATE USER %s;" % (test_user), True)
-
- _internal_run_query("GRANT USAGE ON SCHEMA %s TO %s;" % (schema, test_user), True)
-
- # 2) Run test SQLs
- info_(this, "> Running test scripts for:", verbose)
-
- caseset = (set([test.strip() for test in args.testcase.split(',')])
- if args.testcase != "" else set())
-
- modset = {}
- for case in caseset:
- if case.find('/') > -1:
- [mod, algo] = case.split('/')
- if mod not in modset:
- modset[mod] = []
- if algo not in modset[mod]:
- modset[mod].append(algo)
- else:
- modset[case] = []
-
- # Loop through all modules
- for moduleinfo in portspecs['modules']:
-
- # Get module name
- module = moduleinfo['name']
-
- # Skip if doesn't meet specified modules
- if modset is not None and len(modset) > 0 and module not in modset:
- continue
- # JIRA: MADLIB-1078 fix
- # Skip pmml during install-check (when run without the -t option).
- # We can still run install-check on pmml with '-t' option.
- if not modset and module in ['pmml']:
- continue
- info_(this, "> - %s" % module, verbose)
-
- # Make a temp dir for this module (if doesn't exist)
- cur_tmpdir = tmpdir + '/' + module + '/test' # tmpdir is a global variable
- _make_dir(cur_tmpdir)
-
- # Find the Python module dir (platform specific or generic)
- if os.path.isdir(maddir + "/ports/" + portid + "/" + dbver + "/modules/" + module):
- maddir_mod_py = maddir + "/ports/" + portid + "/" + dbver + "/modules"
- else:
- maddir_mod_py = maddir + "/modules"
-
- # Find the SQL module dir (platform specific or generic)
- if os.path.isdir(maddir + "/ports/" + portid + "/modules/" + module):
- maddir_mod_sql = maddir + "/ports/" + portid + "/modules"
+ error_(this, "Cannot validate if schema already exists.", True)
+
+ output_filename = tmpdir + "/madlib_{0}.sql".format(args.command[0])
+ upgrade = False
+ return_val = create_install_madlib_sqlfile(locals(), args.command[0], args.testcase)
+ if return_val == 0:
+ op_msg = args.command[0].capitalize()+"ing" if args.command[0] != 'upgrade' \
+ else 'Upgrading'
+ info_(this, "%s MADlib:" % op_msg, True)
+ _cleanup_comments_in_sqlfile(output_filename, upgrade)
+ result = _run_sql_file(schema, output_filename)
+
+ if result == 'FAIL':
+ info_(this, "MADlib {0} unsuccessful.".format(args.command[0]), True)
+ info_(this, "All changes are rolled back.", True)
else:
- maddir_mod_sql = maddir + "/modules"
-
- # Prepare test schema
- test_schema = "madlib_installcheck_%s" % (module)
- _internal_run_query("DROP SCHEMA IF EXISTS %s CASCADE; CREATE SCHEMA %s;" %
- (test_schema, test_schema), True)
- _internal_run_query("GRANT ALL ON SCHEMA %s TO %s;" %
- (test_schema, test_user), True)
-
- # Switch to test user and prepare the search_path
- pre_sql = '-- Switch to test user:\n' \
- 'SET ROLE %s;\n' \
- '-- Set SEARCH_PATH for install-check:\n' \
- 'SET search_path=%s,%s;\n' \
- % (test_user, test_schema, schema)
-
- # Loop through all test SQL files for this module
- sql_files = maddir_mod_sql + '/' + module + '/test/*.sql_in'
- for sqlfile in sorted(glob.glob(sql_files), reverse=True):
- algoname = os.path.basename(sqlfile).split('.')[0]
- # run only algo specified
- if (module in modset and modset[module] and
- algoname not in modset[module]):
- continue
-
- # Set file names
- tmpfile = cur_tmpdir + '/' + os.path.basename(sqlfile) + '.tmp'
- logfile = cur_tmpdir + '/' + os.path.basename(sqlfile) + '.log'
-
- # If there is no problem with the SQL file
- milliseconds = 0
-
- # Run the SQL
- run_start = datetime.datetime.now()
- retval = _run_sql_file(schema, maddir_mod_py, module,
- sqlfile, tmpfile, logfile, pre_sql)
- # Runtime evaluation
- run_end = datetime.datetime.now()
- milliseconds = round((run_end - run_start).seconds * 1000 +
- (run_end - run_start).microseconds / 1000)
-
- # Check the exit status
- if retval != 0:
- result = 'FAIL'
- keeplogs = True
- # Since every single statement in the test file gets logged,
- # an empty log file indicates an empty or a failed test
- elif os.path.isfile(logfile) and os.path.getsize(logfile) > 0:
- result = 'PASS'
- # Otherwise
- else:
- result = 'ERROR'
-
- # Output result
- print "TEST CASE RESULT|Module: " + module + \
- "|" + os.path.basename(sqlfile) + "|" + result + \
- "|Time: %d milliseconds" % (milliseconds)
-
- if result == 'FAIL':
- error_(this, "Failed executing %s" % tmpfile, False)
- error_(this, "Check the log at %s" % logfile, False)
- # Cleanup test schema for the module
- _internal_run_query("DROP SCHEMA IF EXISTS %s CASCADE;" % (test_schema), True)
-
- # Drop install-check user
- _internal_run_query("DROP OWNED BY %s CASCADE;" % (test_user), True)
- _internal_run_query("DROP USER %s;" % (test_user), True)
+ if args.command[0] != 'uninstall':
+ if args.command[0] == 'upgrade':
+ info_(this, "MADlib %s upgraded successfully in %s schema." % (str(new_madlib_ver), schema), True)
+ else:
+ info_(this, "> Created %s schema" % schema, True)
+ info_(this, "> Created %s.MigrationHistory table" % schema, True)
+ info_(this, "> Wrote version info in MigrationHistory table", True)
+ info_(this, "MADlib %s installed successfully in %s schema." % (str(new_madlib_ver), schema))
+ else :
+ info_(this, "MADlib %s uninstalled successfully from %s schema." % (str(new_madlib_ver), schema))
http://git-wip-us.apache.org/repos/asf/madlib/blob/8e34f68d/src/madpack/upgrade_util.py
----------------------------------------------------------------------
diff --git a/src/madpack/upgrade_util.py b/src/madpack/upgrade_util.py
index 359c3ec..c77b893 100644
--- a/src/madpack/upgrade_util.py
+++ b/src/madpack/upgrade_util.py
@@ -4,10 +4,12 @@ import os
import re
import yaml
+from utilities import _write_to_file
from utilities import is_rev_gte
+from utilities import get_dbver
from utilities import get_rev_num
+from utilities import remove_comments_from_sql
from utilities import run_query
-from utilities import get_dbver
if not __name__ == "__main__":
def run_sql(sql, portid, con_args):
@@ -123,7 +125,7 @@ class ChangeHandler(UpgradeBase):
"""
def __init__(self, schema, portid, con_args, maddir, mad_dbrev,
- upgrade_to=None):
+ output_filehandle, upgrade_to=None):
UpgradeBase.__init__(self, schema, portid, con_args)
# FIXME: maddir includes the '/src' folder. It's supposed to be the
@@ -132,7 +134,7 @@ class ChangeHandler(UpgradeBase):
self._mad_dbrev = mad_dbrev
self._newmodule = {}
self._curr_rev = self._get_current_version() if not upgrade_to else upgrade_to
-
+ self.output_filehandle = output_filehandle
self._udt = {}
self._udf = {}
self._uda = {}
@@ -404,8 +406,8 @@ class ChangeHandler(UpgradeBase):
cascade_str = 'CASCADE' if udt in ('svec', 'bytea8') else ''
# CASCADE DROP for svec and bytea8 because the recv/send
# functions and the type depend on each other
- self._run_sql("DROP TYPE IF EXISTS {0}.{1} {2}".
- format(self._schema, udt, cascade_str))
+ _write_to_file("DROP TYPE IF EXISTS {0}.{1} {2};".
+ format(self._schema, udt, cascade_str))
def drop_changed_udf(self):
"""
@@ -418,10 +420,10 @@ class ChangeHandler(UpgradeBase):
# so dropping that function needs this extra check.
udf_arglist = item['argument'] if 'argument' in item else ''
- self._run_sql("DROP FUNCTION IF EXISTS {schema}.{udf}({arg})".
- format(schema=self._schema,
- udf=udf,
- arg=udf_arglist))
+ _write_to_file("DROP FUNCTION IF EXISTS {schema}.{udf}({arg});".
+ format(schema=self._schema,
+ udf=udf,
+ arg=udf_arglist))
def drop_changed_uda(self):
"""
@@ -429,10 +431,10 @@ class ChangeHandler(UpgradeBase):
"""
for uda in self._uda:
for item in self._uda[uda]:
- self._run_sql("DROP AGGREGATE IF EXISTS {schema}.{uda}({arg})".
- format(schema=self._schema,
- uda=uda,
- arg=item['argument']))
+ _write_to_file("DROP AGGREGATE IF EXISTS {schema}.{uda}({arg});".
+ format(schema=self._schema,
+ uda=uda,
+ arg=item['argument']))
def drop_changed_udc(self):
"""
@@ -440,17 +442,17 @@ class ChangeHandler(UpgradeBase):
@note We have special treatment for UDCs defined in the svec module
"""
for udc in self._udc:
- self._run_sql("DROP CAST IF EXISTS ({sourcetype} AS {targettype})".
- format(sourcetype=self._udc[udc]['sourcetype'],
- targettype=self._udc[udc]['targettype']))
+ _write_to_file("DROP CAST IF EXISTS ({sourcetype} AS {targettype});".
+ format(sourcetype=self._udc[udc]['sourcetype'],
+ targettype=self._udc[udc]['targettype']))
def drop_traininginfo_4dt(self):
"""
@brief Drop the madlib.training_info table, which should no longer be used since
the version 1.5
"""
- self._run_sql("DROP TABLE IF EXISTS {schema}.training_info".format(
- schema=self._schema))
+ _write_to_file("DROP TABLE IF EXISTS {schema}.training_info;".
+ format(schema=self._schema))
def drop_changed_udo(self):
"""
@@ -460,8 +462,8 @@ class ChangeHandler(UpgradeBase):
for value in self._udo[op]:
leftarg = value['leftarg'].replace('schema_madlib', self._schema)
rightarg = value['rightarg'].replace('schema_madlib', self._schema)
- self._run_sql("""
- DROP OPERATOR IF EXISTS {schema}.{op} ({leftarg}, {rightarg})
+ _write_to_file("""
+ DROP OPERATOR IF EXISTS {schema}.{op} ({leftarg}, {rightarg});
""".format(schema=self._schema, **locals()))
def drop_changed_udoc(self):
@@ -471,8 +473,8 @@ class ChangeHandler(UpgradeBase):
for op_cls in self._udoc:
for value in self._udoc[op_cls]:
index = value['index']
- self._run_sql("""
- DROP OPERATOR CLASS IF EXISTS {schema}.{op_cls} USING {index}
+ _write_to_file("""
+ DROP OPERATOR CLASS IF EXISTS {schema}.{op_cls} USING {index};
""".format(schema=self._schema, **locals()))
@@ -1077,7 +1079,7 @@ class ScriptCleaner(UpgradeBase):
p_str = "CREATE\s+OPERATOR\s+{schema}\.{op_name}\s*\(" \
"\s*leftarg\s*=\s*{leftarg}\s*," \
"\s*rightarg\s*=\s*{rightarg}\s*," \
- ".*?\)\s*;".format(schema=self._schema.upper(),
+ ".*?\)\s*;".format(schema=self._schema,
op_name=re.escape(each_udo), **locals())
operator_patterns.append(p_str)
return operator_patterns
@@ -1102,7 +1104,7 @@ class ScriptCleaner(UpgradeBase):
index = each_item['index']
p_str = "CREATE\s+OPERATOR\s+CLASS\s+{schema}\.{opc_name}" \
".*?USING\s+{index}" \
- ".*?;".format(schema=self._schema.upper(),
+ ".*?;".format(schema=self._schema,
opc_name=each_udoc, **locals())
opclass_patterns.append(p_str)
return opclass_patterns
@@ -1132,7 +1134,7 @@ class ScriptCleaner(UpgradeBase):
else:
p_arg_str += ',\s*%s\s*' % arg
p_str = "CREATE\s+(ORDERED\s)*\s*AGGREGATE" \
- "\s+%s\.(%s)\s*\(\s*%s\)(.*?);" % (self._schema.upper(),
+ "\s+%s\.(%s)\s*\(\s*%s\)(.*?);" % (self._schema,
each_uda,
p_arg_str)
aggregate_patterns.append(p_str)
@@ -1165,22 +1167,12 @@ class ScriptCleaner(UpgradeBase):
"""
@brief Remove comments in the sql script
"""
- pattern = re.compile(r"""(/\*(.|[\r\n])*?\*/)|(--(.*|[\r\n]))""")
- res = ''
- lines = re.split(r'[\r\n]+', self._sql)
- for line in lines:
- tmp = line
- if not tmp.strip().startswith("E'"):
- line = re.sub(pattern, '', line)
- res += line + '\n'
- self._sql = res.strip()
- self._sql = re.sub(pattern, '', self._sql).strip()
-
- """
- @breif Remove "drop/create type" statements in the sql script
- """
+ self._sql = remove_comments_from_sql(self._sql)
def _clean_type(self):
+ """
+ @breif Remove "drop/create type" statements in the sql script
+ """
# remove 'drop type'
pattern = re.compile('DROP(\s+)TYPE(.*?);', re.DOTALL | re.IGNORECASE)
self._sql = re.sub(pattern, '', self._sql)
@@ -1194,7 +1186,7 @@ class ScriptCleaner(UpgradeBase):
udt_str += udt
else:
udt_str += '|' + udt
- p_str = 'CREATE(\s+)TYPE(\s+)%s\.(%s)(.*?);' % (self._schema.upper(), udt_str)
+ p_str = 'CREATE(\s+)TYPE(\s+)%s\.(%s)(.*?);' % (self._schema, udt_str)
pattern = re.compile(p_str, re.DOTALL | re.IGNORECASE)
self._sql = re.sub(pattern, '', self._sql)
@@ -1299,18 +1291,22 @@ class ScriptCleaner(UpgradeBase):
pattern = re.compile(r"""CREATE(\s+)FUNCTION""", re.DOTALL | re.IGNORECASE)
self._sql = re.sub(pattern, 'CREATE OR REPLACE FUNCTION', self._sql)
- def cleanup(self, sql):
+ def cleanup(self, sql, algoname):
"""
@brief Entry function for cleaning the sql script
"""
self._sql = sql
- self._clean_comment()
- self._clean_type()
- self._clean_cast()
- self._clean_operator()
- self._clean_opclass()
- self._clean_aggregate()
- self._clean_function()
+ # Modify the original sql during upgrade. Clean only non-new modules as
+ # they already exist prior to the upgrade. Mostly, all drops are removed
+ # and replaced by creates.
+ if algoname not in self.get_change_handler().newmodule:
+ self._clean_comment()
+ self._clean_type()
+ self._clean_cast()
+ self._clean_operator()
+ self._clean_opclass()
+ self._clean_aggregate()
+ self._clean_function()
return self._sql
http://git-wip-us.apache.org/repos/asf/madlib/blob/8e34f68d/src/madpack/utilities.py
----------------------------------------------------------------------
diff --git a/src/madpack/utilities.py b/src/madpack/utilities.py
index 4a23b1b..94fc1d5 100644
--- a/src/madpack/utilities.py
+++ b/src/madpack/utilities.py
@@ -32,6 +32,13 @@ import unittest
# Some read-only variables
this = os.path.basename(sys.argv[0]) # name of this script
+# ------------------------------------------------------------------------------
+
+def _write_to_file(handle, sql):
+ handle.write(sql)
+ handle.write('\n')
+
+# ------------------------------------------------------------------------------
def error_(src_name, msg, stop=False):
"""
@@ -44,8 +51,8 @@ def error_(src_name, msg, stop=False):
# stack trace is not printed
if stop:
exit(2)
-# ------------------------------------------------------------------------------
+# ------------------------------------------------------------------------------
def info_(src_name, msg, verbose=True):
"""
@@ -57,6 +64,22 @@ def info_(src_name, msg, verbose=True):
print("{0}: INFO : {1}".format(src_name, msg))
# ------------------------------------------------------------------------------
+def remove_comments_from_sql(sql):
+ """
+ @brief Remove comments in the sql script
+ """
+ pattern = re.compile(r"""(/\*(.|[\r\n])*?\*/)|(--(.*|[\r\n]))""")
+ res = ''
+ lines = re.split(r'[\r\n]+', sql)
+ for line in lines:
+ tmp = line
+ if not tmp.strip().startswith("E'"):
+ line = re.sub(pattern, '', line)
+ res += line + '\n'
+ return_sql = res.strip()
+ return_sql = re.sub(pattern, '', return_sql).strip()
+ return return_sql
+# ------------------------------------------------------------------------------
def run_query(sql, con_args, show_error=True):
# Define sqlcmd
@@ -119,7 +142,7 @@ def run_query(sql, con_args, show_error=True):
# ------------------------------------------------------------------------------
-def get_madlib_dbrev(con_args, schema):
+def get_db_madlib_version(con_args, schema):
"""
Read MADlib version from database
@param con_args database conection object