You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2017/06/16 20:57:52 UTC
[15/34] incubator-madlib git commit: Mulltiple: Add grouping support for SSSP and support GPDB5

Mulltiple: Add grouping support for SSSP and support GPDB5

JIRA: MADLIB-1081

- This commit adds grouping support for SSSP as well as its path function.
- Update chi2 test for GPDB5 alpha compatibility.
- Decouple DROP and CREATE statements for various modules.

Closes #113


Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/8faf6226
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/8faf6226
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/8faf6226

Branch: refs/heads/latest_release
Commit: 8faf62263f6c5aa4281e2d3dc33e389d41784c0e
Parents: c82b9d0
Author: Orhan Kislal <ok...@pivotal.io>
Authored: Mon Apr 17 11:17:41 2017 -0700
Committer: Orhan Kislal <ok...@pivotal.io>
Committed: Mon Apr 17 11:17:41 2017 -0700

----------------------------------------------------------------------
 .../elastic_net_generate_result.py_in           |   2 +-
 .../postgres/modules/graph/graph_utils.py_in    |   2 +-
 src/ports/postgres/modules/graph/sssp.py_in     | 716 ++++++++++++++-----
 src/ports/postgres/modules/graph/sssp.sql_in    | 132 +++-
 .../postgres/modules/graph/test/sssp.sql_in     |  75 +-
 src/ports/postgres/modules/pca/pca.py_in        |   6 +-
 .../modules/stats/test/chi2_test.sql_in         |   2 +-
 .../validation/internal/cross_validation.py_in  |   6 +-
 8 files changed, 739 insertions(+), 202 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8faf6226/src/ports/postgres/modules/elastic_net/elastic_net_generate_result.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/elastic_net/elastic_net_generate_result.py_in b/src/ports/postgres/modules/elastic_net/elastic_net_generate_result.py_in
index c48beca..6246ed9 100644
--- a/src/ports/postgres/modules/elastic_net/elastic_net_generate_result.py_in
+++ b/src/ports/postgres/modules/elastic_net/elastic_net_generate_result.py_in
@@ -81,8 +81,8 @@ def _elastic_net_generate_result(optimizer, iteration_run, **args):
                    schema_madlib=args["schema_madlib"])
 
     # Create the output table
+    plpy.execute("DROP TABLE IF EXISTS {tbl_result}".format(**args))
     plpy.execute("""
-             DROP TABLE IF EXISTS {tbl_result};
              CREATE TABLE {tbl_result} (
                  {select_grouping_info}
                  family            text,

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8faf6226/src/ports/postgres/modules/graph/graph_utils.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/graph_utils.py_in b/src/ports/postgres/modules/graph/graph_utils.py_in
index 2d83301..25f70a5 100644
--- a/src/ports/postgres/modules/graph/graph_utils.py_in
+++ b/src/ports/postgres/modules/graph/graph_utils.py_in
@@ -72,7 +72,7 @@ def validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params,
 		"""Graph {func_name}: The vertex column {vertex_id} is not present in vertex table ({vertex_table}) """.
 		format(**locals()))
 	_assert(columns_exist_in_table(edge_table, edge_params.values()),
-		"""Graph {func_name}: Not all columns from {cols} present in edge table ({edge_table})""".
+		"""Graph {func_name}: Not all columns from {cols} are present in edge table ({edge_table})""".
 		format(cols=edge_params.values(), **locals()))
 
 	return None

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8faf6226/src/ports/postgres/modules/graph/sssp.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/sssp.py_in b/src/ports/postgres/modules/graph/sssp.py_in
index 4d27761..2520830 100644
--- a/src/ports/postgres/modules/graph/sssp.py_in
+++ b/src/ports/postgres/modules/graph/sssp.py_in
@@ -33,27 +33,56 @@ from utilities.control import MinWarning
 from utilities.utilities import _assert
 from utilities.utilities import extract_keyvalue_params
 from utilities.utilities import unique_string
-from utilities.validate_args import get_cols
-from utilities.validate_args import unquote_ident
+from utilities.utilities import _string_to_array
+from utilities.utilities import split_quoted_delimited_str
 from utilities.validate_args import table_exists
 from utilities.validate_args import columns_exist_in_table
 from utilities.validate_args import table_is_empty
+from utilities.validate_args import get_cols_and_types
+from utilities.validate_args import get_expr_type
 
 m4_changequote(`<!', `!>')
 
+
+def _check_groups(tbl1, tbl2, grp_list):
+
+	"""
+	Helper function for joining tables with groups.
+	Args:
+		@param tbl1       Name of the first table
+		@param tbl2       Name of the second table
+		@param grp_list   The list of grouping columns
+	"""
+
+	return ' AND '.join([" {tbl1}.{i} = {tbl2}.{i} ".format(**locals())
+		for i in grp_list])
+
+def _grp_from_table(tbl, grp_list):
+
+	"""
+	Helper function for selecting grouping columns of a table
+	Args:
+		@param tbl        Name of the table
+		@param grp_list   The list of grouping columns
+	"""
+	return ' , '.join([" {tbl}.{i} ".format(**locals())
+		for i in grp_list])
+
 def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table,
-		edge_args, source_vertex, out_table, **kwargs):
+		edge_args, source_vertex, out_table, grouping_cols, **kwargs):
+
 	"""
     Single source shortest path function for graphs using the Bellman-Ford
     algorhtm [1].
     Args:
-        @param vertex_table     Name of the table that contains the vertex data.
-        @param vertex_id        Name of the column containing the vertex ids.
-        @param edge_table       Name of the table that contains the edge data.
-        @param edge_args        A comma-delimited string containing multiple
-        						named arguments of the form "name=value".
-        @param source_vertex    The source vertex id for the algorithm to start.
-        @param out_table   	    Name of the table to store the result of SSSP.
+        @param vertex_table    Name of the table that contains the vertex data.
+        @param vertex_id       Name of the column containing the vertex ids.
+        @param edge_table      Name of the table that contains the edge data.
+        @param edge_args       A comma-delimited string containing multiple
+                               named arguments of the form "name=value".
+        @param source_vertex   The source vertex id for the algorithm to start.
+        @param out_table       Name of the table to store the result of SSSP.
+        @param grouping_cols   The list of grouping columns.
 
     [1] https://en.wikipedia.org/wiki/Bellman-Ford_algorithm
     """
@@ -61,6 +90,7 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table,
 	with MinWarning("warning"):
 
 		INT_MAX = 2147483647
+		INFINITY = "'Infinity'"
 		EPSILON = 0.000001
 
 		message = unique_string(desp='message')
@@ -73,8 +103,23 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table,
 		edge_params = extract_keyvalue_params(edge_args,
                                             params_types,
                                             default_args)
+
+		# Prepare the input for recording in the summary table
 		if vertex_id is None:
+			v_st= "NULL"
 			vertex_id = "id"
+		else:
+			v_st = vertex_id
+		if edge_args is None:
+			e_st = "NULL"
+		else:
+			e_st = edge_args
+		if grouping_cols is None:
+			g_st = "NULL"
+			glist = None
+		else:
+			g_st = grouping_cols
+			glist = split_quoted_delimited_str(grouping_cols)
 
 		src = edge_params["src"]
 		dest = edge_params["dest"]
@@ -85,47 +130,91 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table,
 		local_distribution = m4_ifdef(<!__POSTGRESQL__!>, <!''!>,
 			<!"DISTRIBUTED BY (id)"!>)
 
-		validate_sssp(vertex_table, vertex_id, edge_table,
-			edge_params, source_vertex, out_table)
+		is_hawq = m4_ifdef(<!__HAWQ__!>, <!True!>, <!False!>)
+		_validate_sssp(vertex_table, vertex_id, edge_table,
+			edge_params, source_vertex, out_table, glist)
 
 		plpy.execute(" DROP TABLE IF EXISTS {0},{1},{2}".format(
 			message,oldupdate,newupdate))
 
+		# Initialize grouping related variables
+		comma_grp = ""
+		comma_grp_e = ""
+		comma_grp_m = ""
+		grp_comma = ""
+		checkg_oo = ""
+		checkg_eo = ""
+		checkg_ex = ""
+		checkg_om = ""
+		group_by = ""
+
+		if grouping_cols is not None:
+			comma_grp = " , " + grouping_cols
+			group_by = " , " + _grp_from_table(edge_table,glist)
+			comma_grp_e = " , " + _grp_from_table(edge_table,glist)
+			comma_grp_m = " , " + _grp_from_table("message",glist)
+			grp_comma = grouping_cols + " , "
+
+			checkg_oo_sub = _check_groups(out_table,"oldupdate",glist)
+			checkg_oo = " AND " + checkg_oo_sub
+			checkg_eo = " AND " + _check_groups(edge_table,"oldupdate",glist)
+			checkg_ex = " AND " + _check_groups(edge_table,"x",glist)
+			checkg_om = " AND " + _check_groups("out_table","message",glist)
+
+		w_type = get_expr_type(weight,edge_table).lower()
+		init_w = INT_MAX
+		if w_type in ['double precision','float8']:
+			init_w = INFINITY
+
 		# We keep a table of every vertex, the minimum cost to that destination
 		# seen so far and the parent to this vertex in the associated shortest
-		# path. This table will be updated throughtout the execution.
+		# path. This table will be updated throughout the execution.
 		plpy.execute(
-			""" CREATE TABLE {out_table} AS
-				SELECT {vertex_id} AS {vertex_id},
-					CAST('Infinity' AS DOUBLE PRECISION) AS {weight},
-					NULL::INT AS parent
-				FROM {vertex_table}
-				WHERE {vertex_id} IS NOT NULL
+			""" CREATE TABLE {out_table} AS ( SELECT
+					{grp_comma} {src} AS {vertex_id}, {weight},
+					{src} AS parent FROM {edge_table} LIMIT 0)
 				{distribution} """.format(**locals()))
 
+		# We keep a summary table to keep track of the parameters used for this
+		# SSSP run. This table is used in the path finding function to eliminate
+		# the need for repetition.
+		plpy.execute( """ CREATE TABLE {out_table}_summary  (
+			vertex_table            TEXT,
+			vertex_id               TEXT,
+			edge_table              TEXT,
+			edge_args               TEXT,
+			source_vertex           INTEGER,
+			out_table               TEXT,
+			grouping_cols           TEXT)
+			""".format(**locals()))
+		plpy.execute( """ INSERT INTO {out_table}_summary VALUES
+			('{vertex_table}', '{v_st}', '{edge_table}', '{e_st}',
+			{source_vertex}, '{out_table}', '{g_st}')
+			""".format(**locals()))
+
 		# We keep 2 update tables and alternate them during the execution.
 		# This is necessary since we need to know which vertices are updated in
 		# the previous iteration to calculate the next set of updates.
 		plpy.execute(
-			""" CREATE TEMP TABLE {oldupdate}(
-				id INT, val DOUBLE PRECISION, parent INT)
+			""" CREATE TEMP TABLE {oldupdate} AS ( SELECT
+					{src} AS id, {weight},
+					{src} AS parent {comma_grp} FROM {edge_table} LIMIT 0)
 				{local_distribution}
 				""".format(**locals()))
 		plpy.execute(
-			""" CREATE TEMP TABLE {newupdate}(
-				id INT, val DOUBLE PRECISION, parent INT)
+			""" CREATE TEMP TABLE {newupdate} AS ( SELECT
+					{src} AS id, {weight},
+					{src} AS parent {comma_grp} FROM {edge_table} LIMIT 0)
 				{local_distribution}
 				""".format(**locals()))
 
 		# Since HAWQ does not allow us to update, we create a new table and
-		# rename at every iteration
-		temp_table = unique_string(desp='temp')
-		sql = m4_ifdef(<!__HAWQ__!>,
-			""" CREATE TABLE {temp_table} (
-					{vertex_id} INT, {weight} DOUBLE PRECISION, parent INT)
-					{distribution};
-			""",  <!''!>)
-		plpy.execute(sql.format(**locals()))
+		# rename at every iteration.
+		if is_hawq:
+			temp_table = unique_string(desp='temp')
+			sql =""" CREATE TABLE {temp_table} AS ( SELECT * FROM {out_table} )
+				{distribution} """
+			plpy.execute(sql.format(**locals()))
 
 		# GPDB and HAWQ have distributed by clauses to help them with indexing.
 		# For Postgres we add the indices manually.
@@ -137,45 +226,117 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table,
 			<!''!>)
 		plpy.execute(sql_index)
 
-		# The source can be reached with 0 cost and it has itself as the parent.
-		plpy.execute(
-			""" INSERT INTO {oldupdate}
-				VALUES({source_vertex},0,{source_vertex})
-			""".format(**locals()))
+		# The initialization step is quite different when grouping is involved
+		# since not every group (subgraph) will have the same set of vertices.
+
+		# Example:
+		# Assume there are two grouping columns g1 and g2
+		# g1 values are 0 and 1. g2 values are 5 and 6
+		if grouping_cols is not None:
+
+			distinct_grp_table = unique_string(desp='grp')
+			plpy.execute(""" DROP TABLE IF EXISTS {distinct_grp_table} """.
+				format(**locals()))
+			plpy.execute( """ CREATE TEMP TABLE {distinct_grp_table} AS
+				SELECT DISTINCT {grouping_cols} FROM {edge_table} """.
+				format(**locals()))
+			subq = unique_string(desp='subquery')
+
+			checkg_ds_sub = _check_groups(distinct_grp_table,subq,glist)
+			grp_d_comma = _grp_from_table(distinct_grp_table,glist) +","
+
+			plpy.execute(
+				""" INSERT INTO {out_table}
+				SELECT {grp_d_comma} {vertex_id} AS {vertex_id},
+					{init_w} AS {weight}, NULL::INT AS parent
+				FROM {distinct_grp_table} INNER JOIN
+					(
+					SELECT {src} AS {vertex_id} {comma_grp}
+					FROM {edge_table}
+					UNION
+					SELECT {dest} AS {vertex_id} {comma_grp}
+					FROM {edge_table}
+					) {subq} ON ({checkg_ds_sub})
+				WHERE {vertex_id} IS NOT NULL
+				""".format(**locals()))
+
+			plpy.execute(
+				""" INSERT INTO {oldupdate}
+					SELECT {source_vertex}, 0, {source_vertex},
+					{grouping_cols}
+					FROM {distinct_grp_table}
+				""".format(**locals()))
+
+			# The maximum number of vertices for any group.
+			# Used for determining negative cycles.
+			v_cnt = plpy.execute(
+				""" SELECT max(count) as max FROM (
+						SELECT count({vertex_id}) AS count
+						FROM {out_table}
+						GROUP BY {grouping_cols}) x
+				""".format(**locals()))[0]['max']
+			plpy.execute("DROP TABLE IF EXISTS {0}".format(distinct_grp_table))
+		else:
+			plpy.execute(
+				""" INSERT INTO {out_table}
+				SELECT {vertex_id} AS {vertex_id},
+					{init_w} AS {weight},
+					NULL AS parent
+				FROM {vertex_table}
+				WHERE {vertex_id} IS NOT NULL
+				 """.format(**locals()))
+
+			# The source can be reached with 0 cost and it has itself as the
+			# parent.
+			plpy.execute(
+				""" INSERT INTO {oldupdate}
+					VALUES({source_vertex},0,{source_vertex})
+				""".format(**locals()))
+
+			v_cnt = plpy.execute(
+				""" SELECT count(*) FROM {vertex_table}
+				WHERE {vertex_id} IS NOT NULL
+				""".format(**locals()))[0]['count']
 
-		v_cnt = plpy.execute(
-			"""SELECT count(*) FROM {vertex_table}
-			WHERE {vertex_id} IS NOT NULL""".format(**locals()))[0]['count']
 		for i in range(0,v_cnt+1):
 
-			# Apply the updates calculated in the last iteration
-			sql = m4_ifdef(<!__HAWQ__!>,
-				<!"""
+			# Apply the updates calculated in the last iteration.
+			if is_hawq:
+				sql = """
 				TRUNCATE TABLE {temp_table};
 				INSERT INTO {temp_table}
 					SELECT *
 					FROM {out_table}
-					WHERE {out_table}.{vertex_id} NOT IN (
-						SELECT {oldupdate}.id FROM {oldupdate})
+					WHERE NOT EXISTS (
+						SELECT 1
+						FROM {oldupdate} as oldupdate
+						WHERE {out_table}.{vertex_id} = oldupdate.id
+						{checkg_oo})
 					UNION
-					SELECT * FROM {oldupdate};
+					SELECT {grp_comma} id, {weight}, parent FROM {oldupdate};
 				DROP TABLE {out_table};
 				ALTER TABLE {temp_table} RENAME TO {out_table};
-				CREATE TABLE {temp_table} (
-					{vertex_id} INT, {weight} DOUBLE PRECISION, parent INT)
-					{distribution};
-				"""!>,
-				<!"""
+				CREATE TABLE {temp_table} AS (
+					SELECT * FROM {out_table} LIMIT 0)
+					{distribution};"""
+				plpy.execute(sql.format(**locals()))
+				ret = plpy.execute("SELECT id FROM {0} LIMIT 1".
+					format(oldupdate))
+			else:
+				sql = """
 				UPDATE {out_table} SET
-				{weight}=oldupdate.val,
+				{weight}=oldupdate.{weight},
 				parent=oldupdate.parent
 				FROM
 				{oldupdate} AS oldupdate
 				WHERE
-				{out_table}.{vertex_id}=oldupdate.id
-				"""!>)
-			plpy.execute(sql.format(**locals()))
+				{out_table}.{vertex_id}=oldupdate.id AND
+				{out_table}.{weight}>oldupdate.{weight} {checkg_oo}
+				"""
+				ret = plpy.execute(sql.format(**locals()))
 
+			if ret.nrows() == 0:
+				break
 
 			plpy.execute("TRUNCATE TABLE {0}".format(newupdate))
 
@@ -194,105 +355,237 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table,
 			# for comparison.
 
 			# Once we have a list of edges and values (stores as 'message'),
-			# we check if these values are lower than the existing shortest path
-			# values.
+			# we check if these values are lower than the existing shortest
+			# path values.
 
 			sql = (""" INSERT INTO {newupdate}
-				SELECT DISTINCT ON (message.id) message.id AS id,
-					message.val AS val,
-					message.parent AS parent
+				SELECT DISTINCT ON (message.id {comma_grp})
+					message.id AS id,
+					message.{weight} AS {weight},
+					message.parent AS parent {comma_grp_m}
 				FROM {out_table} AS out_table INNER JOIN
 					(
-						SELECT edge_table.{dest} AS id, x.val AS val,
-							oldupdate.id AS parent
+					SELECT {edge_table}.{dest} AS id, x.{weight} AS {weight},
+						oldupdate.id AS parent {comma_grp_e}
+					FROM {oldupdate} AS oldupdate INNER JOIN
+						{edge_table}  ON
+							({edge_table}.{src} = oldupdate.id {checkg_eo})
+						INNER JOIN
+						(
+						SELECT {edge_table}.{dest} AS id,
+							min(oldupdate.{weight} +
+								{edge_table}.{weight}) AS {weight} {comma_grp_e}
 						FROM {oldupdate} AS oldupdate INNER JOIN
-							{edge_table} AS edge_table ON
-							(edge_table.{src} = oldupdate.id) INNER JOIN
-							(
-								SELECT edge_table.{dest} AS id,
-									min(oldupdate.val + edge_table.{weight})
-									AS val
-								FROM {oldupdate} AS oldupdate INNER JOIN
-									{edge_table} AS edge_table ON
-									(edge_table.{src}=oldupdate.id)
-								GROUP BY edge_table.{dest}
-							) x ON (edge_table.{dest} = x.id)
-						WHERE ABS(oldupdate.val + edge_table.{weight} - x.val)
-							< {EPSILON}
-					) AS message ON (message.id = out_table.{vertex_id})
-				WHERE message.val<out_table.{weight}
+							{edge_table}  ON
+							({edge_table}.{src}=oldupdate.id {checkg_eo})
+						GROUP BY {edge_table}.{dest} {comma_grp_e}
+						) x
+						ON ({edge_table}.{dest} = x.id {checkg_ex} )
+					WHERE ABS(oldupdate.{weight} + {edge_table}.{weight}
+								- x.{weight}) < {EPSILON}
+					) message
+					ON (message.id = out_table.{vertex_id} {checkg_om})
+				WHERE message.{weight}<out_table.{weight}
 				""".format(**locals()))
 
-			# If there are no updates, SSSP is finalized
-			ret = plpy.execute(sql)
-			if ret.nrows() == 0:
-				break
+			plpy.execute(sql)
 
-			# Swap the update tables for the next iteration
+			# Swap the update tables for the next iteration.
 			tmp = oldupdate
 			oldupdate = newupdate
 			newupdate = tmp
 
-		# Bellman-Ford should converge in |V|-1 iterations.
+		plpy.execute("DROP TABLE IF EXISTS {0}".format(newupdate))
+		# The algorithm should converge in less than |V| iterations.
+		# Otherwise there is a negative cycle in the graph.
 		if i == v_cnt:
-			plpy.execute("DROP TABLE IF EXISTS {out_table}".format(**locals()))
-			plpy.error("Graph SSSP: Detected a negative cycle in the graph.")
-
-		m4_ifdef(<!__HAWQ__!>,
-			plpy.execute("DROP TABLE {temp_table} ".format(**locals())), <!''!>)
+			if grouping_cols is None:
+				plpy.execute("DROP TABLE IF EXISTS {0},{1},{2}".
+					format(out_table, out_table+"_summary", oldupdate))
+				if is_hawq:
+					plpy.execute("DROP TABLE IF EXISTS {0}".format(temp_table))
+				plpy.error("Graph SSSP: Detected a negative cycle in the graph.")
+
+			# It is possible that not all groups has negative cycles.
+			else:
+
+				# gsql is the string created by collating grouping columns.
+				# By looking at the oldupdate table we can see which groups
+				# are in a negative cycle.
+
+				negs = plpy.execute(
+					""" SELECT array_agg(DISTINCT ({grouping_cols})) AS grp
+						FROM {oldupdate}
+					""".format(**locals()))[0]['grp']
+
+				# Delete the groups with negative cycles from the output table.
+				sql_del = """ DELETE FROM {out_table}
+					USING {oldupdate} AS oldupdate
+					WHERE {checkg_oo_sub}"""
+				if is_hawq:
+					sql_del = """
+						TRUNCATE TABLE {temp_table};
+						INSERT INTO {temp_table}
+							SELECT *
+							FROM {out_table}
+							WHERE NOT EXISTS(
+								SELECT 1
+								FROM {oldupdate} as oldupdate
+								WHERE {checkg_oo_sub}
+								);
+						DROP TABLE {out_table};
+						ALTER TABLE {temp_table} RENAME TO {out_table};"""
+
+				plpy.execute(sql_del.format(**locals()))
+
+				# If every group has a negative cycle,
+				# drop the output table as well.
+				if table_is_empty(out_table):
+					plpy.execute("DROP TABLE IF EXISTS {0},{1}".
+						format(out_table,out_table+"_summary"))
+
+				plpy.warning(
+					"""Graph SSSP: Detected a negative cycle in the """ +
+					"""sub-graphs of following groups: {0}.""".
+					format(str(negs)[1:-1]))
+
+		plpy.execute("DROP TABLE IF EXISTS {0}".format(oldupdate))
+		if is_hawq:
+			plpy.execute("DROP TABLE IF EXISTS {temp_table} ".
+				format(**locals()))
 
 	return None
 
-def graph_sssp_get_path(schema_madlib, sssp_table, dest_vertex, **kwargs):
+def graph_sssp_get_path(schema_madlib, sssp_table, dest_vertex, path_table,
+	**kwargs):
 	"""
-	Helper function that can be used to get the shortest path for a vertex
+    Helper function that can be used to get the shortest path for a vertex
     Args:
-    	@param source_table	Name of the table that contains the SSSP output.
-        @param out_table	The vertex that will be the destination of the
-            				desired path.
+        @param sssp_table   Name of the table that contains the SSSP output.
+        @param dest_vertex  The vertex that will be the destination of the
+                            desired path.
+        @param path_table   Name of the output table that contains the path.
 	"""
+	with MinWarning("warning"):
+		_validate_get_path(sssp_table, dest_vertex, path_table)
 
-	validate_get_path(sssp_table, dest_vertex)
-	cur = dest_vertex
-	cols = get_cols(sssp_table)
-	id = cols[0]
-	ret = [dest_vertex]
-	plan_name = unique_string(desp='plan')
-
-	# Follow the 'parent' chain until you reach the source.
-	# We don't need to know what the source is since it is the only vertex with
-	# itself as its parent
-	plpy.execute(""" PREPARE {plan_name} (int) AS
-		SELECT parent FROM {sssp_table} WHERE {id} = $1 LIMIT 1
-		""".format(**locals()))
-	sql = "EXECUTE {plan_name} ({cur})"
-	parent = plpy.execute(sql.format(**locals()))
+		temp1_name = unique_string(desp='temp1')
+		temp2_name = unique_string(desp='temp2')
 
-	if parent.nrows() == 0:
-		plpy.error(
-			"Graph SSSP: Vertex {0} is not present in the sssp table {1}".
-			format(dest_vertex,sssp_table))
-
-	while 1:
-		parent = parent[0]['parent']
-		if parent == cur:
-			ret.reverse()
-			return ret
-		else:
-			ret.append(parent)
-			cur = parent
-		parent = plpy.execute(sql.format(**locals()))
+		select_grps = ""
+		check_grps_t1 = ""
+		check_grps_t2 = ""
+		check_grps_pt1 = ""
+		check_grps_pt2 = ""
+		checkg_po = ""
+		grp_comma = ""
+		tmp = ""
+
+		summary = plpy.execute("SELECT * FROM {0}_summary".format(sssp_table))
+		vertex_id = summary[0]['vertex_id']
+		source_vertex = summary[0]['source_vertex']
+
+		if vertex_id == "NULL":
+			vertex_id = "id"
+
+		grouping_cols = summary[0]['grouping_cols']
+		if grouping_cols == "NULL":
+			grouping_cols = None
+
+		if grouping_cols is not None:
+			glist = split_quoted_delimited_str(grouping_cols)
+			select_grps = _grp_from_table(sssp_table,glist) + " , "
+			check_grps_t1 = " AND " + _check_groups(
+				sssp_table,temp1_name,glist)
+			check_grps_t2 = " AND " + _check_groups(
+				sssp_table,temp2_name,glist)
+
+			checkg_po = " WHERE " + _check_groups(
+				path_table,"oldupdate",glist)
+			grp_comma = grouping_cols + " , "
+
+		if source_vertex == dest_vertex:
+			plpy.execute("""
+				CREATE TABLE {path_table} AS
+				SELECT {grp_comma} '{{{dest_vertex}}}'::INT[] AS path
+				FROM {sssp_table} WHERE {vertex_id} = {dest_vertex}
+				""".format(**locals()))
+			return
+
+		plpy.execute( "DROP TABLE IF EXISTS {0},{1}".
+			format(temp1_name,temp2_name));
+		out = plpy.execute(""" CREATE TEMP TABLE {temp1_name} AS
+				SELECT {grp_comma} {sssp_table}.parent AS {vertex_id},
+					ARRAY[{dest_vertex}] AS path
+				FROM {sssp_table}
+				WHERE {vertex_id} = {dest_vertex}
+					AND {sssp_table}.parent IS NOT NULL
+			""".format(**locals()))
+
+		plpy.execute("""
+			CREATE TEMP TABLE {temp2_name} AS
+				SELECT * FROM {temp1_name} LIMIT 0
+			""".format(**locals()))
+
+		# Follow the 'parent' chain until you reach the source.
+		while out.nrows() > 0:
+
+			plpy.execute("TRUNCATE TABLE {temp2_name}".format(**locals()))
+			# If the vertex id is not the source vertex,
+			# Add it to the path and move to its parent
+			out = plpy.execute(
+				""" INSERT INTO {temp2_name}
+				SELECT {select_grps} {sssp_table}.parent AS {vertex_id},
+					{sssp_table}.{vertex_id} || {temp1_name}.path AS path
+				FROM {sssp_table} INNER JOIN {temp1_name} ON
+					({sssp_table}.{vertex_id} = {temp1_name}.{vertex_id}
+						{check_grps_t1})
+				WHERE {source_vertex} <> {sssp_table}.{vertex_id}
+				""".format(**locals()))
+
+			tmp = temp2_name
+			temp2_name = temp1_name
+			temp1_name = tmp
+
+			tmp = check_grps_t1
+			check_grps_t1 = check_grps_t2
+			check_grps_t2 = tmp
+
+		# Add the source vertex to the beginning of every path and
+		# add the empty arrays for the groups that don't have a path to reach
+		# the destination vertex
+		plpy.execute("""
+			CREATE TABLE {path_table} AS
+			SELECT {grp_comma} {source_vertex} || path AS path
+			FROM {temp2_name}
+			UNION
+			SELECT {grp_comma} '{{}}'::INT[] AS path
+			FROM {sssp_table}
+			WHERE {vertex_id} = {dest_vertex}
+				AND {sssp_table}.parent IS NULL
+			""".format(**locals()))
+
+		out = plpy.execute("SELECT 1 FROM {0} LIMIT 1".format(path_table))
+
+		if out.nrows() == 0:
+			plpy.error(
+				"Graph SSSP: Vertex {0} is not present in the SSSP table {1}".
+				format(dest_vertex,sssp_table))
+
+		plpy.execute("DROP TABLE IF EXISTS {temp1_name}, {temp1_name}".
+			format(**locals()))
 
 	return None
 
-def validate_sssp(vertex_table, vertex_id, edge_table, edge_params,
-	source_vertex, out_table, **kwargs):
+
+def _validate_sssp(vertex_table, vertex_id, edge_table, edge_params,
+	source_vertex, out_table, glist, **kwargs):
 
 	validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params,
 		out_table,'SSSP')
 
 	_assert(isinstance(source_vertex,int),
-		"""Graph SSSP: Source vertex {source_vertex} has to be an integer """.
+		"""Graph SSSP: Source vertex {source_vertex} has to be an integer.""".
 		format(**locals()))
 	src_exists = plpy.execute("""
 		SELECT * FROM {vertex_table} WHERE {vertex_id}={source_vertex}
@@ -300,8 +593,8 @@ def validate_sssp(vertex_table, vertex_id, edge_table, edge_params,
 
 	if src_exists.nrows() == 0:
 		plpy.error(
-			"""Graph SSSP: Source vertex {source_vertex} is not present in the
-			vertex table {vertex_table} """.format(**locals()))
+			"""Graph SSSP: Source vertex {source_vertex} is not present in the vertex table {vertex_table}.""".
+			format(**locals()))
 
 	vt_error = plpy.execute(
 		""" SELECT {vertex_id}
@@ -312,12 +605,20 @@ def validate_sssp(vertex_table, vertex_id, edge_table, edge_params,
 
 	if vt_error.nrows() != 0:
 		plpy.error(
-			"""Graph SSSP: Source vertex table {vertex_table}
-			contains duplicate vertex id's """.format(**locals()))
+			"""Graph SSSP: Source vertex table {vertex_table} contains duplicate vertex id's.""".
+			format(**locals()))
+
+	_assert(not table_exists(out_table+"_summary"),
+		"Graph SSSP: Output summary table already exists!")
+
+	if glist is not None:
+		_assert(columns_exist_in_table(edge_table, glist),
+			"""Graph SSSP: Not all columns from {glist} are present in edge table ({edge_table}).""".
+			format(**locals()))
 
 	return None
 
-def validate_get_path(sssp_table, dest_vertex, **kwargs):
+def _validate_get_path(sssp_table, dest_vertex, path_table, **kwargs):
 
 	_assert(sssp_table and sssp_table.strip().lower() not in ('null', ''),
 		"Graph SSSP: Invalid SSSP table name!")
@@ -326,21 +627,31 @@ def validate_get_path(sssp_table, dest_vertex, **kwargs):
 	_assert(not table_is_empty(sssp_table),
 		"Graph SSSP: SSSP table ({0}) is empty!".format(sssp_table))
 
+	summary = sssp_table+"_summary"
+	_assert(table_exists(summary),
+		"Graph SSSP: SSSP summary table ({0}) is missing!".format(summary))
+	_assert(not table_is_empty(summary),
+		"Graph SSSP: SSSP summary table ({0}) is empty!".format(summary))
+
+	_assert(not table_exists(path_table),
+		"Graph SSSP: Output path table already exists!")
+
+	return None
 
 def graph_sssp_help(schema_madlib, message, **kwargs):
-    """
-    Help function for graph_sssp and graph_sssp_get_path
+	"""
+	Help function for graph_sssp and graph_sssp_get_path
 
-    Args:
-        @param schema_madlib
-        @param message: string, Help message string
-        @param kwargs
+	Args:
+		@param schema_madlib
+		@param message: string, Help message string
+		@param kwargs
 
-    Returns:
-        String. Help/usage information
-    """
-    if not message:
-        help_string = """
+	Returns:
+	    String. Help/usage information
+	"""
+	if not message:
+		help_string = """
 -----------------------------------------------------------------------
                             SUMMARY
 -----------------------------------------------------------------------
@@ -352,41 +663,120 @@ weights of its constituent edges is minimized.
 For more details on function usage:
     SELECT {schema_madlib}.graph_sssp('usage')
             """
-    elif message in ['usage', 'help', '?']:
-        help_string = """
+	elif message.lower() in ['usage', 'help', '?']:
+		help_string = """
 {graph_usage}
 
 To retrieve the path for a specific vertex:
 
  SELECT {schema_madlib}.graph_sssp_get_path(
     sssp_table	TEXT, -- Name of the table that contains the SSSP output.
-    dest_vertex	INT   -- The vertex that will be the destination of the
-    		  -- desired path.
+    dest_vertex	INT,  -- The vertex that will be the destination of the
+                      -- desired path.
+    path_table  TEXT  -- Name of the output table that contains the path.
 );
 
 ----------------------------------------------------------------------------
                             OUTPUT
 ----------------------------------------------------------------------------
-The output table ('out_table' above) will contain a row for every vertex from
-vertex_table and have the following columns:
-
-vertex_id 	: The id for the destination. Will use the input parameter
-		(vertex_id) for column naming.
-weight 		: The total weight of the shortest path from the source vertex
-		to this particular vertex. Will use the input parameter (weight)
-		for column naming.
-parent 		: The parent of this vertex in the shortest path from source.
-		Will use "parent" for column naming.
-
-The graph_sssp_get_path function will return an INT array that contains the
-shortest path from the initial source vertex to the desired destination vertex.
+The output of SSSP ('out_table' above) contains a row for every vertex of
+every group and have the following columns (in addition to the grouping
+columns):
+  - vertex_id : The id for the destination. Will use the input parameter
+                'vertex_id' for column naming.
+  - weight    : The total weight of the shortest path from the source vertex
+              to this particular vertex.
+              Will use the input parameter 'weight' for column naming.
+  - parent    : The parent of this vertex in the shortest path from source.
+              Will use 'parent' for column naming.
+
+The output of graph_sssp_get_path ('path_table' above) contains a row for
+every group and has the following columns:
+  - grouping_cols : The grouping columns given in the creation of the SSSP
+                  table. If there are no grouping columns, these columns
+                  will not exist and the table will have a single row.
+  - path (ARRAY)  : The shortest path from the source vertex (as specified
+                  in the SSSP execution) to the destination vertex.
+"""
+	elif message.lower() in ("example", "examples"):
+		help_string = """
+----------------------------------------------------------------------------
+                                EXAMPLES
+----------------------------------------------------------------------------
+-- Create a graph, represented as vertex and edge tables.
+DROP TABLE IF EXISTS vertex,edge,out,out_summary,out_path;
+CREATE TABLE vertex(
+        id INTEGER
+        );
+CREATE TABLE edge(
+        src INTEGER,
+        dest INTEGER,
+        weight DOUBLE PRECISION
+);
+
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6),
+(7)
+;
+INSERT INTO edge VALUES
+(0, 1, 1),
+(0, 2, 1),
+(0, 4, 10),
+(1, 2, 2),
+(1, 3, 10),
+(2, 3, 1),
+(2, 5, 1),
+(2, 6, 3),
+(3, 0, 1),
+(4, 0, -2),
+(5, 6, 1),
+(6, 7, 1)
+;
+
+-- Compute the SSSP:
+DROP TABLE IF EXISTS pagerank_out;
+SELECT madlib.graph_sssp(
+	'vertex',                            -- Vertex table
+	'id',                                -- Vertix id column
+	'edge',                              -- Edge table
+	'src=src, dest=dest, weight=weight', -- Comma delimted string of edge arguments
+	 0,                                  -- The source vertex
+	'out'                                -- Output table of SSSP
+);
+-- View the SSSP costs for every vertex:
+SELECT * FROM out ORDER BY id;
+
+-- View the actual shortest path for a vertex:
+SELECT graph_sssp_get_path('out',5,'out_path');
+SELECT * FROM out_path;
+
+-- Create a graph with 2 groups:
+DROP TABLE IF EXISTS edge_gr;
+CREATE TABLE edge_gr AS
+(
+  SELECT *, 0 AS grp FROM edge
+  UNION
+  SELECT *, 1 AS grp FROM edge WHERE src < 6 AND dest < 6
+);
+INSERT INTO edge_gr VALUES
+(4,5,-20,1);
+
+-- Find SSSP for all groups:
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT graph_sssp('vertex',NULL,'edge_gr',NULL,0,'out_gr','grp');
 """
-    else:
-        help_string = "No such option. Use {schema_madlib}.graph_sssp()"
+	else:
+		help_string = "No such option. Use {schema_madlib}.graph_sssp()"
 
-    return help_string.format(schema_madlib=schema_madlib,
-    	graph_usage=get_graph_usage(schema_madlib, 'graph_sssp',
+	return help_string.format(schema_madlib=schema_madlib,
+		graph_usage=get_graph_usage(schema_madlib, 'graph_sssp',
     """source_vertex INT,  -- The source vertex id for the algorithm to start.
-    out_table     TEXT  -- Name of the table to store the result of SSSP."""))
+    out_table     TEXT, -- Name of the table to store the result of SSSP.
+    grouping_cols TEXT  -- The list of grouping columns."""))
 # ---------------------------------------------------------------------
-

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8faf6226/src/ports/postgres/modules/graph/sssp.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/sssp.sql_in b/src/ports/postgres/modules/graph/sssp.sql_in
index 7f89823..be433dc 100644
--- a/src/ports/postgres/modules/graph/sssp.sql_in
+++ b/src/ports/postgres/modules/graph/sssp.sql_in
@@ -55,7 +55,8 @@ graph_sssp( vertex_table,
             edge_table,
             edge_args,
             source_vertex,
-            out_table
+            out_table,
+            grouping_cols
           )
 </pre>
 
@@ -89,12 +90,18 @@ exist in the 'vertex_id' column of 'vertex_table'.</dd>
 
 <dt>out_table</dt>
 <dd>TEXT. Name of the table to store the result of SSSP.
-It will contain a row for every vertex from 'vertex_table' and have
-the following columns:
+It contains a row for every vertex of every group and have
+the following columns (in addition to the grouping columns):
   - vertex_id : The id for the destination. Will use the input parameter 'vertex_id' for column naming.
   - weight : The total weight of the shortest path from the source vertex to this particular vertex.
-  Will use the input parameter (weight) for column naming.
-  - parent : The parent of this vertex in the shortest path from source. Will use 'parent' for column naming.</dd>
+  Will use the input parameter 'weight' for column naming.
+  - parent : The parent of this vertex in the shortest path from source. Will use 'parent' for column naming.
+
+A summary table named <out_table>_summary is also created. This is an internal table that keeps a record of the input parameters and is used by the path function described below.
+</dd>
+
+<dt>grouping_cols</dt>
+<dd>TEXT, default = NULL. List of columns used to group the input into discrete subgraphs. These columns must exist in the edge table. When this value is null, no grouping is used and a single SSSP result is generated. </dd>
 </dl>
 
 @par Path Retrieval
@@ -103,9 +110,10 @@ The path retrieval function returns the shortest path from the
 source vertex to a specified desination vertex.
 
 <pre class="syntax">
-graph_sssp( sssp_table,
-            dest_vertex
-          )
+graph_sssp_get_path( sssp_table,
+                     dest_vertex,
+                     path_table
+                    )
 </pre>
 
 \b Arguments
@@ -115,6 +123,14 @@ graph_sssp( sssp_table,
 
 <dt>dest_vertex</dt>
 <dd>INTEGER. The vertex that will be the destination of the desired path.</dd>
+
+<dt>path_table</dt>
+<dd>TEXT. Name of the output table that contains the path.
+It contains a row for every group and has the following columns:
+  - grouping_cols : The grouping columns given in the creation of the SSSP table. If there are no grouping columns, these columns will not exist and the table will have a single row.
+  - path (ARRAY) : The shortest path from the source vertex (as specified in the SSSP execution) to the destination vertex.
+</dd>
+
 </dl>
 
 @anchor notes
@@ -167,7 +183,7 @@ INSERT INTO edge VALUES
 
 -# Calculate the shortest paths from vertex 0:
 <pre class="syntax">
-DROP TABLE IF EXISTS out;
+DROP TABLE IF EXISTS out, out_summary;
 SELECT madlib.graph_sssp(
                          'vertex',      -- Vertex table
                          NULL,          -- Vertix id column (NULL means use default naming)
@@ -191,14 +207,16 @@ SELECT * FROM out ORDER BY id;
 (8 rows)
 </pre>
 
--# Get the shortest path to vertex 6:
+-# Get the shortest path to vertex 5:
 <pre class="syntax">
-SELECT madlib.graph_sssp_get_path('out',6) AS spath;
+DROP TABLE IF EXISTS out_path;
+SELECT madlib.graph_sssp_get_path('out',5,'out_path');
+SELECT * FROM out_path;
 </pre>
 <pre class="result">
-   spath
-\-----------
- {0,2,5,6}
+  path
+\---------
+ {0,2,5}
 </pre>
 
 -# Now let's do a similar example except using
@@ -212,10 +230,10 @@ CREATE TABLE edge_alt AS SELECT src AS e_src, dest, weight AS e_weight FROM edge
 
 -# Get the shortest path from vertex 1:
 <pre class="syntax">
-DROP TABLE IF EXISTS out_alt;
+DROP TABLE IF EXISTS out_alt, out_alt_summary;
 SELECT madlib.graph_sssp(
                          'vertex_alt',                  -- Vertex table
-                         'v_id',                        -- Vertix id column (NULL means use default naming)
+                         'v_id',                        -- Vertex id column (NULL means use default naming)
                          'edge_alt',                    -- Edge table
                          'src=e_src, weight=e_weight',  -- Edge arguments (NULL means use default naming)
                          1,                             -- Source vertex for path calculation
@@ -236,6 +254,65 @@ SELECT * FROM out_alt ORDER BY v_id;
 (8 rows)
 </pre>
 
+-# Create a graph with 2 groups:
+<pre class="syntax">
+DROP TABLE IF EXISTS edge_gr;
+CREATE TABLE edge_gr AS
+(
+  SELECT *, 0 AS grp FROM edge
+  UNION
+  SELECT *, 1 AS grp FROM edge WHERE src < 6 AND dest < 6
+);
+INSERT INTO edge_gr VALUES
+(4,5,-20,1);
+</pre>
+
+-# Find SSSP for all groups
+<pre class="syntax">
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT madlib.graph_sssp(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertex id column (NULL means use default naming)
+                         'edge_gr',     -- Edge table
+                         NULL,          -- Edge arguments (NULL means use default naming)
+                         0,             -- Source vertex for path calculation
+                         'out_gr',      -- Output table of shortest paths
+                         'grp'          -- Grouping columns
+);
+SELECT * FROM out_gr ORDER BY grp,id;
+</pre>
+<pre class="result">
+ grp | id | weight | parent
+-----+----+--------+--------
+   0 |  0 |      0 |      0
+   0 |  1 |      1 |      0
+   0 |  2 |      1 |      0
+   0 |  3 |      2 |      2
+   0 |  4 |     10 |      0
+   0 |  5 |      2 |      2
+   0 |  6 |      3 |      5
+   0 |  7 |      4 |      6
+   1 |  0 |      0 |      0
+   1 |  1 |      1 |      0
+   1 |  2 |      1 |      0
+   1 |  3 |      2 |      2
+   1 |  4 |     10 |      0
+   1 |  5 |    -10 |      4
+</pre>
+
+-# Find the path to vertex 5 in every group
+<pre class="syntax">
+DROP TABLE IF EXISTS out_gr_path;
+SELECT madlib.graph_sssp_get_path('out_gr',5,'out_gr_path');
+SELECT * FROM out_gr_path ORDER BY grp;
+</pre>
+<pre class="result">
+ grp |  path
+-----+---------
+   0 | {0,2,5}
+   1 | {0,4,5}
+</pre>
+
 @anchor literature
 @par Literature
 
@@ -253,21 +330,36 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_sssp(
     edge_table              TEXT,
     edge_args               TEXT,
     source_vertex           INT,
-    out_table               TEXT
+    out_table               TEXT,
+    grouping_cols           TEXT
 
 ) RETURNS VOID AS $$
     PythonFunction(graph, sssp, graph_sssp)
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
 -------------------------------------------------------------------------
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_sssp(
+    vertex_table            TEXT,
+    vertex_id               TEXT,
+    edge_table              TEXT,
+    edge_args               TEXT,
+    source_vertex           INT,
+    out_table               TEXT
+
+) RETURNS VOID AS $$
+     SELECT MADLIB_SCHEMA.graph_sssp($1, $2, $3, $4, $5, $6, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
+-------------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_sssp_get_path(
     sssp_table             TEXT,
-    dest_vertex            INT
+    dest_vertex            INT,
+    path_table             TEXT
 
-) RETURNS INT[] AS $$
+) RETURNS VOID AS $$
     PythonFunction(graph, sssp, graph_sssp_get_path)
 $$ LANGUAGE plpythonu VOLATILE
-m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
 -------------------------------------------------------------------------
 
 -- Online help

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8faf6226/src/ports/postgres/modules/graph/test/sssp.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/test/sssp.sql_in b/src/ports/postgres/modules/graph/test/sssp.sql_in
index e2342c5..c3545c2 100644
--- a/src/ports/postgres/modules/graph/test/sssp.sql_in
+++ b/src/ports/postgres/modules/graph/test/sssp.sql_in
@@ -20,7 +20,10 @@
  *//* ----------------------------------------------------------------------- */
 
 
-DROP TABLE IF EXISTS vertex,edge,out,vertex_alt,edge_alt,out_alt;
+DROP TABLE IF EXISTS vertex,edge,out,out_summary,out_path,
+	vertex_alt,edge_alt,out_alt,out_alot_summary,
+	edge_gr,out_gr,out_gr_summary,out_gr_path,
+	edge_gr2, out_gr2, out_gr2_summary;
 
 
 CREATE TABLE vertex(
@@ -30,7 +33,7 @@ CREATE TABLE vertex(
 CREATE TABLE edge(
                   src INTEGER,
                   dest INTEGER,
-                  weight INTEGER
+                  weight DOUBLE PRECISION
                 );
 
 INSERT INTO vertex VALUES
@@ -62,17 +65,69 @@ SELECT graph_sssp('vertex',NULL,'edge',NULL,0,'out');
 
 SELECT * FROM out;
 
-SELECT assert(weight = 3, 'Wrong output in graph (SSSP)') FROM out WHERE id = 6;
-SELECT assert(parent = 5, 'Wrong parent in graph (SSSP)') FROM out WHERE id = 6;
+SELECT assert(weight = 3, 'Wrong output in graph (SSSP)')
+	FROM out WHERE id = 6;
+SELECT assert(parent = 5, 'Wrong parent in graph (SSSP)')
+	FROM out WHERE id = 6;
 
-SELECT graph_sssp_get_path('out',6);
+SELECT graph_sssp_get_path('out',6,'out_path');
 
-CREATE TABLE vertex_alt AS SELECT id AS v_id FROM vertex;
-CREATE TABLE edge_alt AS SELECT src AS e_src, dest, weight AS e_weight FROM edge;
+CREATE TABLE vertex_alt AS SELECT id AS v_id
+	FROM vertex;
+CREATE TABLE edge_alt AS SELECT src AS e_src, dest, weight AS e_weight
+	FROM edge;
 
-SELECT graph_sssp('vertex_alt','v_id','edge_alt','src=e_src, weight=e_weight',1,'out_alt');
+SELECT graph_sssp('vertex_alt','v_id','edge_alt','src=e_src, weight=e_weight'
+	,1,'out_alt');
 
 SELECT * FROM out_alt;
 
-SELECT assert(e_weight = 4, 'Wrong output in graph (SSSP)') FROM out_alt WHERE v_id = 6;
-SELECT assert(parent = 5, 'Wrong parent in graph (SSSP)') FROM out_alt WHERE v_id = 6;
+SELECT assert(e_weight = 4, 'Wrong output in graph (SSSP)')
+	FROM out_alt WHERE v_id = 6;
+SELECT assert(parent = 5, 'Wrong parent in graph (SSSP)')
+	FROM out_alt WHERE v_id = 6;
+
+CREATE TABLE edge_gr AS
+( 	SELECT *, 0 AS grp FROM edge
+	UNION
+	SELECT *, 1 AS grp FROM edge WHERE src < 6 AND dest < 6
+	UNION
+	SELECT *, 2 AS grp FROM edge WHERE src < 6 AND dest < 6
+);
+
+INSERT INTO edge_gr VALUES
+(7,NULL,NULL,1),
+(4,0,-20,2);
+
+SELECT graph_sssp('vertex',NULL,'edge_gr',NULL,0,'out_gr','grp');
+
+SELECT assert(weight = 3, 'Wrong output in graph (SSSP)')
+	FROM out_gr WHERE id = 6 AND grp = 0;
+SELECT assert(parent = 5, 'Wrong parent in graph (SSSP)')
+	FROM out_gr WHERE id = 6 AND grp = 0;
+
+SELECT assert(weight = 2, 'Wrong output in graph (SSSP)')
+	FROM out_gr WHERE id = 5 AND grp = 1;
+SELECT assert(parent = 2, 'Wrong parent in graph (SSSP)')
+	FROM out_gr WHERE id = 5 AND grp = 1;
+
+SELECT assert(weight = 'Infinity', 'Wrong output in graph (SSSP)')
+	FROM out_gr WHERE id = 7 AND grp = 1;
+
+SELECT graph_sssp_get_path('out_gr',5,'out_gr_path');
+
+CREATE TABLE edge_gr2 AS
+( 	SELECT *, 0 AS grp1, 0 AS grp2 FROM edge
+	UNION
+	SELECT *, 1 AS grp1, 0 AS grp2 FROM edge WHERE src < 6 AND dest < 6
+	UNION
+	SELECT *, 1 AS grp1, 1 AS grp2 FROM edge WHERE src < 6 AND dest < 6
+);
+
+SELECT graph_sssp('vertex',NULL,'edge_gr2',NULL,0,'out_gr2','grp1,grp2');
+
+
+SELECT assert(weight = 3, 'Wrong output in graph (SSSP)')
+	FROM out_gr2 WHERE id = 6 AND grp1 = 0 AND grp2 = 0;
+SELECT assert(parent = 5, 'Wrong parent in graph (SSSP)')
+	FROM out_gr2 WHERE id = 6 AND grp1 = 0 AND grp2 = 0;

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8faf6226/src/ports/postgres/modules/pca/pca.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/pca/pca.py_in b/src/ports/postgres/modules/pca/pca.py_in
index 196c558..680e9f6 100644
--- a/src/ports/postgres/modules/pca/pca.py_in
+++ b/src/ports/postgres/modules/pca/pca.py_in
@@ -144,16 +144,16 @@ def pca_wrap(schema_madlib, source_table, pc_table, row_id,
         )
         """.format(pc_table=pc_table, grouping_cols_clause=grouping_cols_clause))
     pc_table_mean = add_postfix(pc_table, "_mean")
+    plpy.execute("DROP TABLE IF EXISTS {0}".format(pc_table_mean))
     plpy.execute("""
-        DROP TABLE IF EXISTS {pc_table_mean};
         CREATE TABLE {pc_table_mean} (
             column_mean     double precision[]
             {grouping_cols_clause}
         )
         """.format(pc_table_mean=pc_table_mean, grouping_cols_clause=grouping_cols_clause))
     if result_summary_table:
+        plpy.execute("DROP TABLE IF EXISTS {0}".format(result_summary_table))
         plpy.execute("""
-                DROP TABLE IF EXISTS {0};
                 CREATE TABLE {0} (
                 rows_used               INTEGER,
                 "exec_time (ms)"        numeric,
@@ -947,7 +947,7 @@ SELECT {schema_madlib}.pca_train( 'mat',
           'id',
           3
     );
-    
+
 SELECT * FROM result_table ORDER BY row_id;
 
 DROP TABLE IF EXISTS mat_group;

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8faf6226/src/ports/postgres/modules/stats/test/chi2_test.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/stats/test/chi2_test.sql_in b/src/ports/postgres/modules/stats/test/chi2_test.sql_in
index c49d996..62648a0 100644
--- a/src/ports/postgres/modules/stats/test/chi2_test.sql_in
+++ b/src/ports/postgres/modules/stats/test/chi2_test.sql_in
@@ -58,7 +58,7 @@ CREATE TABLE chi2_independence_est_1 AS
 SELECT (chi2_gof_test(observed, expected, deg_freedom)).*
 FROM (
     SELECT
-        observed,
+        id_x,id_y,observed,
         sum(observed) OVER (PARTITION BY id_x)::DOUBLE PRECISION
             * sum(observed) OVER (PARTITION BY id_y) AS expected
     FROM chi2_test_friendly_unpivoted

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8faf6226/src/ports/postgres/modules/validation/internal/cross_validation.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/validation/internal/cross_validation.py_in b/src/ports/postgres/modules/validation/internal/cross_validation.py_in
index c1b2561..11cde2f 100644
--- a/src/ports/postgres/modules/validation/internal/cross_validation.py_in
+++ b/src/ports/postgres/modules/validation/internal/cross_validation.py_in
@@ -200,9 +200,9 @@ def _cv_copy_data(rel_origin, dependent_varname,
     """
     """
     target_col, features_col = 'y', 'x'
+    plpy.execute("drop table if exists {0}".format(rel_copied))
     plpy.execute("""
         select setseed(0.5);
-        drop table if exists {rel_copied};
         create temp table {rel_copied} as
             select
                 row_number() over (order by random()) as {random_id},
@@ -233,15 +233,15 @@ def _cv_split_data(rel_source, col_data, col_id, row_num,
     # which corresponds to rows outside of [start_row, end_row).
     # Extract the validation part of data,
     # which corresponds to rows inside of [start_row, end_row).
+    plpy.execute("drop view if exists {rel_train}".format(**kwargs))
     plpy.execute("""
-        drop view if exists {rel_train};
         create temp view {rel_train} as
             select {col_id}, {col_string} from {rel_source}
             where {col_id} < {start_row}
                  or {col_id} >= {end_row}
         """.format(**kwargs))
+    plpy.execute("drop view if exists {rel_valid}".format(**kwargs))
     plpy.execute("""
-        drop view if exists {rel_valid};
         create temp view {rel_valid} as
             select {col_id}, {col_string} from {rel_source}
             where {col_id} >= {start_row}