You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2017/08/29 20:41:53 UTC
[16/50] [abbrv] incubator-madlib git commit: Graph: Add Breadth-first
Search algorithm with grouping support.
Graph: Add Breadth-first Search algorithm with grouping support.
JIRA: MADLIB-1102
This algorithm searches or traverses connected nodes in a graph in breadth-first order
starting at a user-specified origin node. Grouping support is also included.
Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/8c9b955c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/8c9b955c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/8c9b955c
Branch: refs/heads/latest_release
Commit: 8c9b955cd2e3150ad935ad1581e164670723184f
Parents: 492fc7e
Author: Rashmi Raghu <rr...@pivotal.io>
Authored: Wed Jul 12 15:15:21 2017 -0700
Committer: Rashmi Raghu <rr...@pivotal.io>
Committed: Wed Jul 12 15:24:16 2017 -0700
----------------------------------------------------------------------
doc/mainpage.dox.in | 3 +
src/ports/postgres/modules/graph/bfs.py_in | 477 +++++++++++++++++++
src/ports/postgres/modules/graph/bfs.sql_in | 471 ++++++++++++++++++
.../postgres/modules/graph/graph_utils.py_in | 11 +
.../postgres/modules/graph/test/bfs.sql_in | 278 +++++++++++
5 files changed, 1240 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8c9b955c/doc/mainpage.dox.in
----------------------------------------------------------------------
diff --git a/doc/mainpage.dox.in b/doc/mainpage.dox.in
index e16f9b2..52afd48 100644
--- a/doc/mainpage.dox.in
+++ b/doc/mainpage.dox.in
@@ -129,6 +129,9 @@ complete matrix stored as a distributed table.
@defgroup grp_apsp All Pairs Shortest Path
@ingroup grp_graph
+ @defgroup grp_bfs Breadth-first Search
+ @ingroup grp_graph
+
@defgroup grp_pagerank PageRank
@ingroup grp_graph
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8c9b955c/src/ports/postgres/modules/graph/bfs.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/bfs.py_in b/src/ports/postgres/modules/graph/bfs.py_in
new file mode 100644
index 0000000..a5631ea
--- /dev/null
+++ b/src/ports/postgres/modules/graph/bfs.py_in
@@ -0,0 +1,477 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Breadth-First Search
+
+# Please refer to the bfs.sql_in file for the documentation
+
+"""
+@file bfs.py_in
+
+@namespace graph
+"""
+
+import plpy
+from graph_utils import validate_graph_coding
+from graph_utils import get_graph_usage
+from graph_utils import _grp_null_checks
+from utilities.control import MinWarning
+from utilities.utilities import _assert
+from utilities.utilities import extract_keyvalue_params
+from utilities.utilities import split_quoted_delimited_str
+from utilities.validate_args import table_exists
+from utilities.validate_args import columns_exist_in_table
+
+m4_changequote(`<!', `!>')
+
+def _validate_bfs(vertex_table, vertex_id, edge_table, edge_params,
+ source_vertex, out_table, max_distance, directed, grouping_cols_list, **kwargs):
+
+ validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params,
+ out_table,'BFS')
+
+ _assert((max_distance >= 0) and isinstance(max_distance,int),
+ """Graph BFS: Invalid max_distance type or value ({0}), must be integer,
+ be greater than or equal to 0 and be less than max allowable integer
+ (2147483647).""".
+ format(max_distance))
+
+ _assert(isinstance(directed,bool),
+ """Graph BFS: Invalid value for directed ({0}), must be boolean.""".
+ format(directed))
+
+ _assert(isinstance(source_vertex,int),
+ """Graph BFS: Source vertex {source_vertex} has to be an integer.""".
+ format(**locals()))
+ src_exists = plpy.execute("""
+ SELECT * FROM {vertex_table} WHERE {vertex_id}={source_vertex}
+ """.format(**locals()))
+ if src_exists.nrows() == 0:
+ plpy.error(
+ """Graph BFS: Source vertex {source_vertex} is not present in the
+ vertex table {vertex_table}.""".
+ format(**locals()))
+
+ vt_error = plpy.execute(
+ """ SELECT {vertex_id}
+ FROM {vertex_table}
+ WHERE {vertex_id} IS NOT NULL
+ GROUP BY {vertex_id}
+ HAVING count(*) > 1 """.format(**locals()))
+ if vt_error.nrows() != 0:
+ plpy.error(
+ """Graph BFS: Source vertex table {vertex_table} contains duplicate
+ vertex id's.""".
+ format(**locals()))
+
+ _assert(not table_exists(out_table+"_summary"),
+ "Graph BFS: Output summary table already exists!")
+
+ if grouping_cols_list is not None:
+ _assert(columns_exist_in_table(edge_table, grouping_cols_list),
+ """Graph BFS: Not all columns from {grouping_cols_list} are present
+ in edge table ({edge_table}).""".
+ format(**locals()))
+
+ return None
+
+
+def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table,
+ edge_args, source_vertex, out_table, max_distance, directed, grouping_cols,
+ **kwargs):
+
+ """
+ Breadth First Search algorithm for graphs [1].
+ Args:
+ @param vertex_table Name of the table that contains the vertex data.
+ @param vertex_id Name of the column containing the vertex ids.
+ @param edge_table Name of the table that contains the edge data.
+ @param edge_args A comma-delimited string containing multiple
+ named arguments of the form "name=value".
+ @param source_vertex The source vertex id for the algorithm to start.
+ @param out_table Name of the table to store the result of BFS.
+ @param max_distance Maximum distance from the source_vertex to search for.
+ @param directed Graph will be treated as directed if this boolean flag
+ is set to TRUE. Graph is treated as undirected by default.
+ @param grouping_cols The list of grouping columns.
+
+ [1] https://en.wikipedia.org/wiki/Breadth-first_search
+ """
+
+ with MinWarning("warning"):
+
+ INT_MAX = 2147483647
+
+ params_types = {'src': str, 'dest': str}
+ default_args = {'src': 'src', 'dest': 'dest'}
+ edge_params = extract_keyvalue_params(edge_args,
+ params_types,
+ default_args)
+
+ # Prepare the input for recording in the summary table
+ if vertex_id is None:
+ v_st= "NULL"
+ vertex_id = "id"
+ else:
+ v_st = vertex_id
+ if edge_args is None:
+ e_st = "NULL"
+ else:
+ e_st = edge_args
+ if max_distance is None:
+ d_st= "NULL"
+ max_distance = INT_MAX
+ else:
+ d_st = max_distance
+ if directed is None:
+ dir_st= "NULL"
+ directed = False
+ else:
+ dir_st = directed
+ if grouping_cols is None or grouping_cols is '':
+ g_st = "NULL"
+ glist = None
+ else:
+ g_st = grouping_cols
+ glist = split_quoted_delimited_str(grouping_cols)
+
+ src = edge_params["src"]
+ dest = edge_params["dest"]
+
+ distribution = m4_ifdef(<!__POSTGRESQL__!>, <!''!>,
+ <!"DISTRIBUTED BY ({0})".format(vertex_id)!>)
+ local_distribution = m4_ifdef(<!__POSTGRESQL__!>, <!''!>,
+ <!"DISTRIBUTED RANDOMLY"!>)
+
+ _validate_bfs(vertex_table, vertex_id, edge_table,
+ edge_params, source_vertex, out_table, max_distance, directed, glist)
+
+ # Initialize grouping related variables
+ grp_comma = ""
+ and_grp_null_checks = ""
+
+ if grouping_cols is not None and grouping_cols is not '':
+ grp_comma = grouping_cols + ", "
+ and_grp_null_checks = " AND " + _grp_null_checks(glist)
+
+ # We keep a table of every vertex, the distance to that vertex from source
+ # and the parent in the path to the vertex.
+ # This table will be updated throughout the execution.
+ dist_col = "dist"
+ parent_col = "parent"
+ curr_dist_val = 0
+
+ # Creating the output table with the appropriate columns and data types
+ plpy.execute("""
+ CREATE TABLE {out_table} AS (
+ SELECT
+ {grp_comma}
+ {src} AS {vertex_id},
+ {curr_dist_val}::INT AS {dist_col},
+ {src} AS {parent_col}
+ FROM {edge_table}
+ LIMIT 0
+ ) {distribution}""".format(**locals()))
+
+ # We keep a summary table to keep track of the parameters used for this
+ # BFS run
+ plpy.execute( """
+ CREATE TABLE {out_table}_summary (
+ vertex_table TEXT,
+ vertex_id TEXT,
+ edge_table TEXT,
+ edge_args TEXT,
+ source_vertex INTEGER,
+ out_table TEXT,
+ max_distance INTEGER,
+ directed BOOLEAN,
+ grouping_cols TEXT
+ )""".format(**locals()))
+
+ plpy.execute("""
+ INSERT INTO {out_table}_summary VALUES
+ ('{vertex_table}', '{v_st}', '{edge_table}', '{e_st}',
+ {source_vertex}, '{out_table}', {d_st}, {dir_st}, '{g_st}')
+ """.format(**locals()))
+
+
+ # The queries for directed and undirected graphs share a common section.
+ # There are additional clauses added to the undirected graph queries.
+ # In the undirected case edges can be considered to go from {src} to
+ # {dest} and {dest} to {src}
+
+ insert_qry_undirected_init = ""
+ count_qry_undirected = ""
+ insert_qry_undirected_loop = ""
+
+ if not directed:
+ insert_qry_undirected_init = """ OR {dest} = {source_vertex}
+ """.format(**locals())
+
+ count_qry_undirected = """ OR (
+ ({grp_comma} {dest}) IN (
+ SELECT {grp_comma} {vertex_id} FROM {out_table}
+ WHERE {dist_col}={{curr_dist_val}}
+ )
+ AND
+ ({grp_comma} {src}) NOT IN (
+ SELECT {grp_comma} {vertex_id} FROM {out_table}
+ )
+ )
+ """.format(**locals())
+
+ insert_qry_undirected_loop = """ UNION
+ SELECT {grp_comma}
+ {src} AS {vertex_id},
+ {{curr_dist_val}}+1 AS {dist_col},
+ {dest} AS {parent_col}
+ FROM {edge_table}
+ WHERE (
+ ({grp_comma} {dest}) IN (
+ SELECT {grp_comma} {vertex_id} FROM {out_table}
+ WHERE {dist_col}={{curr_dist_val}}
+ )
+ AND
+ ({grp_comma} {src}) NOT IN (
+ SELECT {grp_comma} {vertex_id} FROM {out_table}
+ )
+ )
+ """.format(**locals())
+
+ # This step inserts into the output table the source vertex for each
+ # group in which it is present. Grouping behavior is not predictable
+ # when there are NULLs in any grouping column. Therefore those rows
+ # are explicitly removed from analysis
+ insert_qry_init = """
+ INSERT INTO {out_table}
+ SELECT {grp_comma}
+ {source_vertex} AS {vertex_id},
+ {curr_dist_val} AS {dist_col},
+ NULL AS {parent_col}
+ FROM {edge_table}
+ WHERE ({src} = {source_vertex} {insert_qry_undirected_init})
+ {and_grp_null_checks}
+ GROUP BY {grp_comma} {vertex_id}, {dist_col}
+ """.format(**locals())
+ plpy.execute(insert_qry_init.format(**locals()))
+
+ # After initialization of the output table, number of nodes connected
+ # by edges to the source vertex in each group is counted. This is also used
+ # below in the BFS iteration while-loop
+ count_qry = """
+ SELECT COUNT(*)
+ FROM {edge_table}
+ WHERE (
+ ({grp_comma} {src}) IN (
+ SELECT {grp_comma} {vertex_id} FROM {out_table}
+ WHERE {dist_col}={{curr_dist_val}}
+ )
+ AND
+ ({grp_comma} {dest}) NOT IN (
+ SELECT {grp_comma} {vertex_id} FROM {out_table}
+ )
+ ) {count_qry_undirected}
+ """.format(**locals())
+
+ vct = plpy.execute(count_qry.format(**locals()))[0]['count']
+
+ # This insert statement is executed within the BFS iteration while-loop
+ # below. It is used to discover and store all nodes (not already found)
+ # connected to those found in the immediate previous iteration.
+ insert_qry_loop = """
+ INSERT INTO {out_table}
+ SELECT {grp_comma} {vertex_id}, {dist_col}, min({parent_col})
+ FROM (
+ SELECT {grp_comma}
+ {dest} AS {vertex_id},
+ {{curr_dist_val}}+1 AS {dist_col},
+ {src} AS {parent_col}
+ FROM {edge_table}
+ WHERE (
+ ({grp_comma} {src}) IN (
+ SELECT {grp_comma} {vertex_id} FROM {out_table}
+ WHERE {dist_col}={{curr_dist_val}}
+ )
+ AND
+ ({grp_comma} {dest}) NOT IN (
+ SELECT {grp_comma} {vertex_id} FROM {out_table}
+ )
+ )
+ {insert_qry_undirected_loop}
+ ) t1
+ GROUP BY {grp_comma} {vertex_id}, {dist_col}
+ """.format(**locals())
+
+ # Main loop for traversing the graph
+ while vct > 0 and curr_dist_val < max_distance:
+ # The loop consists of two steps:
+ # 1) Disover and store all nodes that are linked to nodes found in
+ # the immediate previous iteration of the loop that have not already
+ # been found in all previous iterations
+ # 2) Check for any nodes linked to those discovered in Step 1 above
+ # that have not yet been discovered
+ #
+ # If a node has multiple possible parents then the parent with the
+ # smallest ID is chosen for output
+
+ # In the directed graph case only nodes in the {dest} column of
+ # the edge table are searched to find new nodes reachable from
+ # previously discovered nodes
+
+ # In the undirected graph case edges are treated as non-directional
+ # (or bidirectional). Nodes in both the {src} and {dest} columns of
+ # the edge table are searched to find new nodes reachable from
+ # previously discovered nodes.
+ #
+ # This approach does NOT require the user to provide a forward edge
+ # and a reverse edge between the same two nodes to indicate the
+ # graph's undirected nature. However, it will work in that scenario
+ # as well.
+
+ # Discover and store all nodes (not already found) connected to
+ # those found in the immediate previous iteration
+ plpy.execute(insert_qry_loop.format(**locals()))
+
+ # Update distance value for next iteration
+ curr_dist_val = curr_dist_val + 1
+
+ # Count / find any nodes that are connected to those discovered and
+ # stored in this iteration. This is used to check if the iterations
+ # need to continue.
+ vct = plpy.execute(count_qry.format(**locals()))[0]['count']
+
+ return None
+
+def graph_bfs_help(schema_madlib, message, **kwargs):
+ """
+ Help function for graph_bfs
+
+ Args:
+ @param schema_madlib
+ @param message: string, Help message string
+ @param kwargs
+
+ Returns:
+ String. Help/usage information
+ """
+ if not message:
+ help_string = """
+-----------------------------------------------------------------------
+ SUMMARY
+-----------------------------------------------------------------------
+
+Given a graph and a source vertex, the Breadth-first Search (BFS) algorithm
+finds all nodes reachable from the source vertex.
+
+For more details on function usage:
+ SELECT {schema_madlib}.graph_bfs('usage')
+ """
+ elif message.lower() in ['usage', 'help', '?']:
+ help_string = """
+Given a graph and a source vertex, the Breadth-first Search (BFS) algorithm
+finds all nodes reachable from the source vertex.
+
+{graph_usage}
+
+----------------------------------------------------------------------------
+ OUTPUT
+----------------------------------------------------------------------------
+The output of BFS ('out_table' above) contains a row for every vertex of that is
+reachable from the source_vertex. In the presence of grouping columns, only those
+edges are used for which there are no NULL values in any grouping column.
+The output table will have the following columns (in addition to the
+grouping columns):
+ - vertex_id : The id for any node reachable from source_vertex in addition to
+ the source_vertex. Will use the input parameter 'vertex_id' for
+ column naming.
+ - dist : The distance in number of edges (or hops) from the source_vertex
+ to where this vertex is located.
+ - parent : The parent of this vertex in BFS traversal of the graph from
+ source_vertex. Will use 'parent' for column naming. For the
+ case where vertex_id = source_vertex, the value for parent is NULL.
+"""
+ elif message.lower() in ("example", "examples"):
+ help_string = """
+----------------------------------------------------------------------------
+ EXAMPLES
+----------------------------------------------------------------------------
+-- Create a graph, represented as vertex and edge tables.
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+ id INTEGER
+ );
+CREATE TABLE edge(
+ src INTEGER,
+ dest INTEGER
+ );
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6),
+(7),
+(8),
+(9),
+(10),
+(11)
+;
+INSERT INTO edge VALUES
+(0, 5),
+(1, 0),
+(1, 3),
+(2, 6),
+(3, 4),
+(3, 5),
+(4, 2),
+(8, 9),
+(9, 10),
+(9, 11),
+(10, 8)
+;
+
+-- Traverse undirected graph from vertex 3:
+DROP TABLE IF EXISTS out, out_summary;
+SELECT madlib.graph_bfs(
+ 'vertex', -- Vertex table
+ NULL, -- Vertix id column (NULL means use default naming)
+ 'edge', -- Edge table
+ NULL, -- Edge arguments (NULL means use default naming)
+ 3, -- Source vertex for BFS
+ 'out' -- Output table of nodes reachable from source_vertex
+ );
+ -- Default values used for the other arguments
+SELECT * FROM out ORDER BY dist,id;
+
+SELECT * FROM out_summary;
+
+"""
+ else:
+ help_string = "No such option. Use {schema_madlib}.graph_bfs()"
+
+ return help_string.format(schema_madlib=schema_madlib,
+ graph_usage=get_graph_usage(schema_madlib, 'graph_bfs',
+ """source_vertex INT, -- The source vertex id for the algorithm to start.
+ out_table TEXT, -- Name of the table to store the result of BFS.
+ max_distance INT, -- Maximum distance from source_vertex to search through in the graph.
+ directed INT, -- If TRUE the graph will be treated as directed.
+ grouping_cols TEXT -- A comma-separated list of grouping columns."""))
+# ---------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8c9b955c/src/ports/postgres/modules/graph/bfs.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/bfs.sql_in b/src/ports/postgres/modules/graph/bfs.sql_in
new file mode 100644
index 0000000..fd7a396
--- /dev/null
+++ b/src/ports/postgres/modules/graph/bfs.sql_in
@@ -0,0 +1,471 @@
+/* ----------------------------------------------------------------------- *//**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *
+ * @file bfs.sql_in
+ *
+ * @brief SQL functions for graph analytics
+ * @date Jun 2017
+ *
+ * @sa Provides Breadth First Search graph algorithm.
+ *
+ *//* ----------------------------------------------------------------------- */
+m4_include(`SQLCommon.m4')
+/**
+@addtogroup grp_bfs
+
+<div class="toc"><b>Contents</b>
+<ul>
+<li><a href="#bfs">Breadth-first Search</a></li>
+<li><a href="#notes">Notes</a></li>
+<li><a href="#examples">Examples</a></li>
+<li><a href="#literature">Literature</a></li>
+</ul>
+</div>
+
+@brief Finds the nodes reachable from a given source vertex using a breadth-first approach.
+
+Given a graph and a source vertex, the Breadth-first Search (BFS) algorithm
+finds all nodes reachable from the source vertex by searching / traversing the graph
+in a breadth-first manner.
+
+@anchor bfs
+@par BFS
+<pre class="syntax">
+graph_bfs( vertex_table,
+ vertex_id,
+ edge_table,
+ edge_args,
+ source_vertex,
+ out_table,
+ max_distance,
+ directed,
+ grouping_cols
+ )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+<dt>vertex_table</dt>
+<dd>TEXT. Name of the table containing the vertex data for the graph. Must contain the
+column specified in the 'vertex_id' parameter below.</dd>
+
+<dt>vertex_id</dt>
+<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
+vertex ids. The vertex ids are of type INTEGER with no duplicates.
+They do not need to be contiguous.</dd>
+
+<dt>edge_table</dt>
+<dd>TEXT. Name of the table containing the edge data. The edge table must contain
+columns for source vertex and destination vertex. Column naming convention is
+described below in the 'edge_args' parameter.
+In addition to vertex columns, if grouping is used then the columns specified
+in the 'grouping_cols' parameter must be present. </dd>
+
+<dt>edge_args</dt>
+<dd>TEXT. A comma-delimited string containing multiple named arguments of
+the form "name=value". The following parameters are supported for
+this string argument:
+ - src (INTEGER): Name of the column containing the source vertex ids in the edge table.
+ Default column name is 'src'.
+ (Not to be confused with the source_vertex argument passed to the BFS function)
+ - dest (INTEGER): Name of the column containing the destination vertex ids in
+ the edge table. Default column name is 'dest'.
+
+<dt>source_vertex</dt>
+<dd>INTEGER. The source vertex id for the algorithm to start. This vertex id must
+exist in the 'vertex_id' column of 'vertex_table'.</dd>
+
+<dt>out_table</dt>
+<dd>TEXT. Name of the table to store the result of BFS.
+It contains a row for every vertex that is reachable from the source_vertex.
+In the presence of grouping columns, only those edges are used for which there are no NULL values
+in any grouping column.
+The output table will have the following columns (in addition to the grouping columns):
+ - vertex_id : The id for any node reachable from source_vertex in addition to
+ the source_vertex. Will use the input parameter 'vertex_id' for
+ column naming.
+ - dist : The distance in number of edges (or hops) from the source_vertex
+ to where this vertex is located.
+ - parent : The parent of this vertex in BFS traversal of the graph from source_vertex.
+ Will use 'parent' for column naming. For the
+ case where vertex_id = source_vertex, the value for parent is NULL.
+
+A summary table named <out_table>_summary is also created. This is an internal table that keeps a record of the input parameters.
+</dd>
+
+<dt>max_distance</dt>
+<dd>INT, default = NULL. Maximum distance (number of edges) from source_vertex to search through in the graph.</dd>
+
+<dt>directed</dt>
+<dd>BOOLEAN, default = FALSE. If TRUE the graph will be treated as directed, else it will be treated as an undirected graph.</dd>
+
+<dt>grouping_cols</dt>
+<dd>TEXT, default = NULL. A comma-separated list of columns used to group the
+input into discrete subgraphs.
+These columns must exist in the edge table. When this value is NULL, no grouping is used
+and a single BFS result is generated.
+@note Expressions are not currently supported for 'grouping_cols'.</dd>
+
+</dl>
+
+@anchor notes
+@par Notes
+
+The graph_bfs function is a SQL implementation of the well-known Breadth-first
+Search algorithm [1] modified appropriately for a relational database. It will
+find any node in the graph reachable from the source_vertex only once. If a node
+is reachable by many different paths from the source_vertex (i.e. has more than
+one parent), then only one of those parents is present in the output table.
+The BFS result will, in general, be different for different choices of source_vertex.
+
+@anchor examples
+@examp
+
+-# Create vertex and edge tables to represent the graph:
+<pre class="syntax">
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+ id INTEGER
+ );
+CREATE TABLE edge(
+ src INTEGER,
+ dest INTEGER
+ );
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6),
+(7),
+(8),
+(9),
+(10),
+(11)
+;
+INSERT INTO edge VALUES
+(0, 5),
+(1, 0),
+(1, 3),
+(2, 6),
+(3, 4),
+(3, 5),
+(4, 2),
+(8, 9),
+(9, 10),
+(9, 11),
+(10, 8)
+;
+</pre>
+
+-# Traverse undirected graph from vertex 3:
+<pre class="syntax">
+DROP TABLE IF EXISTS out, out_summary;
+SELECT madlib.graph_bfs(
+ 'vertex', -- Vertex table
+ NULL, -- Vertix id column (NULL means use default naming)
+ 'edge', -- Edge table
+ NULL, -- Edge arguments (NULL means use default naming)
+ 3, -- Source vertex for BFS
+ 'out'); -- Output table of nodes reachable from source_vertex
+ -- Default values used for the other arguments
+SELECT * FROM out ORDER BY dist,id;
+</pre>
+<pre class="result">
+ id | dist | parent
+----+------+--------
+ 3 | 0 |
+ 1 | 1 | 3
+ 4 | 1 | 3
+ 5 | 1 | 3
+ 0 | 2 | 1
+ 2 | 2 | 4
+ 6 | 3 | 2
+(7 rows)
+</pre>
+<pre class="syntax">
+SELECT * FROM out_summary;
+</pre>
+<pre class="result">
+ vertex_table | vertex_id | edge_table | edge_args | source_vertex | out_table | max_distance | directed | grouping_cols
+--------------+-----------+------------+-----------+---------------+-----------+--------------+----------+---------------
+ vertex | NULL | edge | NULL | 3 | out | | | NULL
+(1 row)
+</pre>
+
+-# In this example, we use max_distance to limit the search distance.
+<pre class="syntax">
+DROP TABLE IF EXISTS out_max, out_max_summary;
+SELECT madlib.graph_bfs(
+ 'vertex', -- Vertex table
+ NULL, -- Vertix id column (NULL means use default naming)
+ 'edge', -- Edge table
+ NULL, -- Edge arguments (NULL means use default naming)
+ 3, -- Source vertex for BFS
+ 'out_max', -- Output table of nodes reachable from source_vertex
+ 2); -- Maximum distance to traverse from source_vertex
+ -- Default values used for the other arguments
+SELECT * FROM out_max ORDER BY dist,id;
+</pre>
+<pre class="result">
+ id | dist | parent
+----+------+--------
+ 3 | 0 |
+ 1 | 1 | 3
+ 4 | 1 | 3
+ 5 | 1 | 3
+ 0 | 2 | 1
+ 2 | 2 | 4
+(6 rows)
+</pre>
+
+-# Now let's do an example using
+different column names in the tables (i.e., not the defaults).
+Create the vertex and edge tables:
+<pre class="syntax">
+DROP TABLE IF EXISTS vertex_alt, edge_alt;
+CREATE TABLE vertex_alt AS SELECT id AS v_id FROM vertex;
+CREATE TABLE edge_alt AS SELECT src AS n1, dest AS n2 FROM edge;
+</pre>
+
+-# Run BFS from vertex 8:
+<pre class="syntax">
+DROP TABLE IF EXISTS out_alt, out_alt_summary;
+SELECT madlib.graph_bfs(
+ 'vertex_alt', -- Vertex table
+ 'v_id', -- Vertex id column (NULL means use default naming)
+ 'edge_alt', -- Edge table
+ 'src=n1, dest=n2', -- Edge arguments (NULL means use default naming)
+ 8, -- Source vertex for BFS
+ 'out_alt'); -- Output table of nodes reachable from source_vertex
+SELECT * FROM out_alt ORDER BY v_id;
+</pre>
+<pre class="result">
+ v_id | dist | parent
+------+------+--------
+ 8 | 0 |
+ 9 | 1 | 8
+ 10 | 1 | 8
+ 11 | 2 | 9
+</pre>
+
+-# Now we show an example where the graph is treated as a directed graph.
+<pre class="syntax">
+DROP TABLE IF EXISTS out_alt_dir, out_alt_dir_summary;
+SELECT madlib.graph_bfs(
+ 'vertex_alt', -- Vertex table
+ 'v_id', -- Vertex id column (NULL means use default naming)
+ 'edge_alt', -- Edge table
+ 'src=n1, dest=n2', -- Edge arguments (NULL means use default naming)
+ 8, -- Source vertex for BFS
+ 'out_alt_dir', -- Output table of nodes reachable from source_vertex
+ NULL, -- Maximum distance to traverse from source_vertex
+ TRUE); -- Flag for specifying directed graph
+SELECT * FROM out_alt_dir ORDER BY v_id;
+</pre>
+<pre class="result">
+ v_id | dist | parent
+------+------+--------
+ 8 | 0 |
+ 9 | 1 | 8
+ 10 | 2 | 9
+ 11 | 2 | 9
+(4 rows)
+</pre>
+Notice that, with the graph being treated as directed, the parent of v_id=10
+is now vertex 9 and not 8 as in the undirected case.
+
+-# Create a graph with 2 groups:
+<pre class="syntax">
+DROP TABLE IF EXISTS edge_gr;
+CREATE TABLE edge_gr(
+ g1 INTEGER,
+ g2 TEXT,
+ src INTEGER,
+ dest INTEGER
+ );
+INSERT INTO edge_gr VALUES
+(100, 'a', 0, 5),
+(100, 'a', 1, 0),
+(100, 'a', 1, 3),
+(100, 'a', 2, 6),
+(100, 'a', 3, 4),
+(100, 'a', 3, 5),
+(100, 'a', 4, 2),
+(100, 'a', 8, 9),
+(100, 'a', 9, 10),
+(100, 'a', 9, 11),
+(100, 'a', 10, 8),
+(202, 'c', 8, 9),
+(202, 'c', 9, 10),
+(202, 'c', 9, 11),
+(202, 'c', 10, 8)
+;
+</pre>
+
+-# Run BFS for all groups from a given source_vertex.
+<pre class="syntax">
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT madlib.graph_bfs(
+ 'vertex', -- Vertex table
+ NULL, -- Vertex id column (NULL means use default naming)
+ 'edge_gr', -- Edge table
+ NULL, -- Edge arguments (NULL means use default naming)
+ 8, -- Source vertex for BFS
+ 'out_gr', -- Output table of nodes reachable from source_vertex
+ NULL, -- Maximum distance to traverse from source_vertex
+ NULL, -- Flag for specifying directed graph
+ 'g1,g2' -- Grouping columns
+);
+SELECT * FROM out_gr ORDER BY g1,g2,dist,id;
+</pre>
+<pre class="result">
+ g1 | g2 | id | dist | parent
+-----+----+----+------+--------
+ 100 | a | 8 | 0 |
+ 100 | a | 9 | 1 | 8
+ 100 | a | 10 | 1 | 8
+ 100 | a | 11 | 2 | 9
+ 202 | c | 8 | 0 |
+ 202 | c | 9 | 1 | 8
+ 202 | c | 10 | 1 | 8
+ 202 | c | 11 | 2 | 9
+(8 rows)
+</pre>
+If source_vertex is not present in
+a group, then that group will not appear in the output table.
+<pre class="syntax">
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT madlib.graph_bfs(
+ 'vertex', -- Vertex table
+ NULL, -- Vertex id column (NULL means use default naming)
+ 'edge_gr', -- Edge table
+ NULL, -- Edge arguments (NULL means use default naming)
+ 3, -- Source vertex for BFS
+ 'out_gr', -- Output table of nodes reachable from source_vertex
+ NULL, -- Maximum distance to traverse from source_vertex
+ NULL, -- Flag for specifying directed graph
+ 'g1,g2' -- Grouping columns
+);
+SELECT * FROM out_gr ORDER BY g1,g2,dist,id;
+</pre>
+<pre class="result">
+ g1 | g2 | id | dist | parent
+-----+----+----+------+--------
+ 100 | a | 3 | 0 |
+ 100 | a | 1 | 1 | 3
+ 100 | a | 4 | 1 | 3
+ 100 | a | 5 | 1 | 3
+ 100 | a | 0 | 2 | 1
+ 100 | a | 2 | 2 | 4
+ 100 | a | 6 | 3 | 2
+(7 rows)
+</pre>
+
+@anchor literature
+@par Literature
+
+[1] Breadth-first Search algorithm. https://en.wikipedia.org/wiki/Breadth-first_search
+*/
+
+-------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_bfs(
+ vertex_table TEXT,
+ vertex_id TEXT,
+ edge_table TEXT,
+ edge_args TEXT,
+ source_vertex INT,
+ out_table TEXT,
+ max_distance INT,
+ directed BOOLEAN,
+ grouping_cols TEXT
+) RETURNS VOID AS $$
+ PythonFunction(graph, bfs, graph_bfs)
+$$ LANGUAGE plpythonu VOLATILE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
+
+-------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_bfs(
+ vertex_table TEXT,
+ vertex_id TEXT,
+ edge_table TEXT,
+ edge_args TEXT,
+ source_vertex INT,
+ out_table TEXT,
+ max_distance INT,
+ directed BOOLEAN
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.graph_bfs($1, $2, $3, $4, $5, $6, $7, $8, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
+
+-------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_bfs(
+ vertex_table TEXT,
+ vertex_id TEXT,
+ edge_table TEXT,
+ edge_args TEXT,
+ source_vertex INT,
+ out_table TEXT,
+ max_distance INT
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.graph_bfs($1, $2, $3, $4, $5, $6, $7, NULL, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
+
+-------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_bfs(
+ vertex_table TEXT,
+ vertex_id TEXT,
+ edge_table TEXT,
+ edge_args TEXT,
+ source_vertex INT,
+ out_table TEXT
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.graph_bfs($1, $2, $3, $4, $5, $6, NULL, NULL, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
+
+-------------------------------------------------------------------------
+
+-- Online help
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_bfs(
+ message VARCHAR
+) RETURNS VARCHAR AS $$
+ PythonFunction(graph, bfs, graph_bfs_help)
+$$ LANGUAGE plpythonu IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_bfs()
+RETURNS VARCHAR AS $$
+ SELECT MADLIB_SCHEMA.graph_bfs('');
+$$ LANGUAGE sql IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+--------------------------------------------------------------------------------
+
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8c9b955c/src/ports/postgres/modules/graph/graph_utils.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/graph_utils.py_in b/src/ports/postgres/modules/graph/graph_utils.py_in
index 77378d6..944c301 100644
--- a/src/ports/postgres/modules/graph/graph_utils.py_in
+++ b/src/ports/postgres/modules/graph/graph_utils.py_in
@@ -38,6 +38,17 @@ from utilities.validate_args import table_exists
from utilities.validate_args import columns_exist_in_table
from utilities.validate_args import table_is_empty
+def _grp_null_checks(grp_list):
+
+ """
+ Helper function for generating NULL checks for grouping columns
+ to be used within a WHERE clause
+ Args:
+ @param grp_list The list of grouping columns
+ """
+ return ' AND '.join([" {i} IS NOT NULL ".format(**locals())
+ for i in grp_list])
+
def _check_groups(tbl1, tbl2, grp_list):
"""
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/8c9b955c/src/ports/postgres/modules/graph/test/bfs.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/test/bfs.sql_in b/src/ports/postgres/modules/graph/test/bfs.sql_in
new file mode 100644
index 0000000..223ab84
--- /dev/null
+++ b/src/ports/postgres/modules/graph/test/bfs.sql_in
@@ -0,0 +1,278 @@
+/* ----------------------------------------------------------------------- *//**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *//* ----------------------------------------------------------------------- */
+
+
+DROP TABLE IF EXISTS vertex,edge,edge_grp;
+
+CREATE TABLE vertex(
+ id INTEGER
+ );
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6),
+(7),
+(8),
+(9),
+(10),
+(11)
+;
+
+CREATE TABLE edge(
+ src INTEGER,
+ dest INTEGER,
+ weight DOUBLE PRECISION
+ );
+INSERT INTO edge VALUES
+(0, 5, 1),
+(1, 0, 1),
+(1, 3, 1),
+(2, 6, 1),
+(3, 4, 1),
+(3, 5, 1),
+(4, 2, 1),
+(8, 9, 1),
+(9, 10, 1),
+(9, 11, 1),
+(10, 8, 1)
+;
+
+CREATE TABLE edge_grp(
+ g1 INTEGER,
+ g2 TEXT,
+ src INTEGER,
+ dest INTEGER,
+ weight DOUBLE PRECISION
+ );
+INSERT INTO edge_grp VALUES
+(100, 'a', 0, 5, 1),
+(100, 'a', 1, 0, 1),
+(100, 'a', 1, 3, 1),
+(100, 'a', 2, 6, 1),
+(100, 'a', 3, 4, 1),
+(100, 'a', 3, 5, 1),
+(100, 'a', 4, 2, 1),
+(100, 'a', 8, 9, 1),
+(100, 'a', 9, 10, 1),
+(100, 'a', 9, 11, 1),
+(100, 'a', 10, 8, 1),
+(100, 'b', 0, 5, 1),
+(100, 'b', 1, 0, 1),
+(100, 'b', 1, 3, 1),
+(100, 'b', 2, 6, 1),
+(100, 'b', 3, 4, 1),
+(100, 'b', 3, 5, 1),
+(100, 'b', 4, 2, 1),
+(100, 'b', 8, 9, 1),
+(100, 'b', 9, 10, 1),
+(100, 'b', 9, 11, 1),
+(100, 'b', 10, 8, 1),
+(202, 'c', 8, 9, 1),
+(202, 'c', 9, 10, 1),
+(202, 'c', 9, 11, 1),
+(202, 'c', 10, 8, 1),
+(NULL, 'b', 8, 9, 1),
+(NULL, 'b', 9, 10, 1),
+(NULL, 'b', 9, 11, 1),
+(NULL, 'b', 10, 8, 1)
+;
+;
+
+---------------------
+-- Undirected Graph
+---------------------
+-- Results to check against - uncomment if needed
+-- -- source_vertex = 3
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+-- INSERT INTO out_actual VALUES
+-- (3,3,0,NULL),
+-- (3,1,1,3),
+-- (3,4,1,3),
+-- (3,5,1,3),
+-- (3,0,2,1),
+-- (3,2,2,4),
+-- (3,6,3,2)
+-- ;
+-- -- source_vertex = 7
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+--
+-- -- source_vertex = 8
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+-- INSERT INTO out_actual VALUES
+-- (8,8,0,NULL),
+-- (8,9,1,8),
+-- (8,10,1,8),
+-- (8,11,2,9)
+-- ;
+
+-- Undirected graph
+-- source_vertex = 3
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex',NULL,'edge',NULL,3,'out_frombfs');
+
+SELECT assert(
+ (array_agg(id ORDER BY id) = ARRAY[1,4,5]::INT[])
+ AND
+ (array_agg(parent ORDER BY id) = ARRAY[3,3,3]::INT[]),
+ 'Wrong output in graph (BFS)')
+ FROM out_frombfs WHERE dist = 1 GROUP BY dist;
+
+SELECT assert(max(dist) = 3, 'Wrong output in graph (BFS)')
+ FROM out_frombfs;
+
+-- source_vertex = 7
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex',NULL,'edge',NULL,7,'out_frombfs');
+
+SELECT assert(COUNT(*) = 0, 'Wrong output in graph (BFS)') FROM out_frombfs;
+
+-- source_vertex = 8
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex',NULL,'edge',NULL,8,'out_frombfs',NULL,FALSE,NULL);
+
+SELECT assert(parent = 8, 'Wrong output in graph (BFS)')
+ FROM out_frombfs WHERE dist = 2 AND id = 10;
+
+-------------------
+-- Directed graph
+-------------------
+-- Results to check against - uncomment if needed
+
+-- -- source_vertex = 3
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+-- INSERT INTO out_actual VALUES
+-- (3,3,0,NULL),
+-- (3,4,1,3),
+-- (3,5,1,3),
+-- (3,2,2,4),
+-- (3,6,3,2)
+-- ;
+-- -- source_vertex = 7
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+-- -- source_vertex = 8
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+-- (8, 8, 0, NULL),
+-- (8, 9, 1, 8),
+-- (8, 10, 2, 9),
+-- (8, 11, 2, 9)
+-- ;
+
+-- source_vertex = 3
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex','id','edge','src,dest',3,'out_frombfs',NULL,TRUE,NULL);
+
+SELECT assert(
+ (array_agg(id ORDER BY id) = ARRAY[4,5]::INT[])
+ AND
+ (array_agg(parent ORDER BY id) = ARRAY[3,3]::INT[]),
+ 'Wrong output in graph (BFS)')
+ FROM out_frombfs WHERE dist = 1 GROUP BY dist;
+SELECT assert(COUNT(*) = 5, 'Wrong output in graph (BFS)') FROM out_frombfs;
+
+-- source_vertex = 7
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex','id','edge','src,dest',7,'out_frombfs',NULL,TRUE,NULL);
+
+SELECT assert(COUNT(*) = 0, 'Wrong output in graph (BFS)') FROM out_frombfs;
+
+-- source_vertex = 8
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex',NULL,'edge',NULL,8,'out_frombfs',NULL,TRUE);
+
+SELECT assert(COUNT(*) = 2, 'Wrong output in graph (BFS)')
+ FROM out_frombfs WHERE dist = 2;
+
+-----------------------
+-- max_distance check
+-----------------------
+-- source_vertex = 3
+-- Undirected graph
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex','id','edge','src,dest',3,'out_frombfs',2,FALSE);
+
+SELECT assert(MAX(dist) = 2 AND COUNT(*) = 6,
+ 'Wrong output in graph (BFS)') FROM out_frombfs;
+
+-- Directed graph
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex',NULL,'edge',NULL,3,'out_frombfs',2,TRUE);
+
+SELECT assert(MAX(dist) = 2 AND COUNT(*) = 4,
+ 'Wrong output in graph (BFS)') FROM out_frombfs;
+
+---------------------
+-- Grouping columns
+---------------------
+-- -- source_vertex = 3
+-- -- Undirected graph
+-- DROP TABLE IF EXISTS out_actual;
+-- CREATE TABLE out_actual (src INT, id INT, dist INT, parent INT);
+-- INSERT INTO out_actual VALUES
+-- (3,100,'a',3,0,NULL),
+-- (3,100,'a',1,1,3),
+-- (3,100,'a',4,1,3),
+-- (3,100,'a',5,1,3),
+-- (3,100,'a',0,2,1),
+-- (3,100,'a',2,2,4),
+-- (3,100,'a',6,3,2),
+-- (3,100,'b',3,0,NULL),
+-- (3,100,'b',1,1,3),
+-- (3,100,'b',4,1,3),
+-- (3,100,'b',5,1,3),
+-- (3,100,'b',0,2,1),
+-- (3,100,'b',2,2,4),
+-- (3,100,'b',6,3,2)
+-- ;
+
+DROP TABLE IF EXISTS out_frombfs, out_frombfs_summary;
+SELECT graph_bfs('vertex',NULL,'edge_grp',NULL,3,'out_frombfs',NULL,NULL,'g1,g2');
+
+SELECT assert( COUNT(*) = 14,'Wrong output in graph (BFS)') FROM out_frombfs;
+SELECT assert(COUNT(*) = 7,
+ 'Wrong output in graph (BFS)') FROM out_frombfs WHERE g2 = 'a'
+;
+SELECT assert(
+ array_agg(id ORDER BY id) = ARRAY[0,2]::INT[]
+ AND
+ array_agg(parent ORDER BY id) = ARRAY[1,4]::INT[],
+ 'Wrong output in graph (BFS)')
+ FROM out_frombfs WHERE dist = 2 AND g1 = 100 AND g2 = 'a'
+;
+SELECT assert(
+ array_agg(id ORDER BY id) = ARRAY[0,2]::INT[]
+ AND
+ array_agg(parent ORDER BY id) = ARRAY[1,4]::INT[],
+ 'Wrong output in graph (BFS)')
+ FROM out_frombfs WHERE dist = 2 AND g1 = 100 AND g2 = 'b'
+;
+SELECT assert(MIN(g1) = 100 AND MAX(g1) = 100,
+ 'Wrong output in graph (BFS)') FROM out_frombfs GROUP BY g1,g2
+;