You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2020/02/26 19:29:11 UTC

[madlib] branch master updated: Graph: Fix ambiguous column name in various functions

This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


The following commit(s) were added to refs/heads/master by this push:
     new 4a97693  Graph: Fix ambiguous column name in various functions
4a97693 is described below

commit 4a97693ce7514972f06f94c9e6265194703041f8
Author: Orhan Kislal <ok...@apache.org>
AuthorDate: Fri Feb 7 20:30:29 2020 -0500

    Graph: Fix ambiguous column name in various functions
    
    JIRA: MADLIB-1407
    
    Some of the graph functions had a bug that surfaced when the vertex id column
    had the same name as the edge src or dest columns. This commit fixes the issue
    on apsp, bfs, hits and pagerank.
    
    In addition, it adds tests for every graph function.
    
    Closes #477
---
 src/ports/postgres/modules/graph/apsp.py_in           |  6 +++---
 src/ports/postgres/modules/graph/bfs.py_in            |  6 +++---
 src/ports/postgres/modules/graph/graph_utils.py_in    |  2 +-
 src/ports/postgres/modules/graph/hits.py_in           |  6 +++---
 src/ports/postgres/modules/graph/measures.py_in       |  2 +-
 src/ports/postgres/modules/graph/pagerank.py_in       |  2 +-
 src/ports/postgres/modules/graph/test/apsp.sql_in     | 12 ++++++++++++
 src/ports/postgres/modules/graph/test/bfs.sql_in      | 13 +++++++++++++
 src/ports/postgres/modules/graph/test/hits.sql_in     | 13 +++++++++++++
 src/ports/postgres/modules/graph/test/measures.sql_in | 15 +++++++++++++++
 src/ports/postgres/modules/graph/test/pagerank.sql_in | 13 +++++++++++++
 src/ports/postgres/modules/graph/test/sssp.sql_in     | 13 +++++++++++++
 src/ports/postgres/modules/graph/test/wcc.sql_in      | 14 ++++++++++++++
 13 files changed, 105 insertions(+), 12 deletions(-)

diff --git a/src/ports/postgres/modules/graph/apsp.py_in b/src/ports/postgres/modules/graph/apsp.py_in
index 4da2fd2..2d50929 100644
--- a/src/ports/postgres/modules/graph/apsp.py_in
+++ b/src/ports/postgres/modules/graph/apsp.py_in
@@ -220,10 +220,10 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, edge_table,
         # The source can be reached with 0 cost and next is itself.
         plpy.execute(
             """ UPDATE {out_table} SET
-            {weight} = 0, parent = {vertex_id}
+            {weight} = 0, parent = {vertex_table}.{vertex_id}
             FROM {vertex_table}
-            WHERE {out_table}.{src} = {vertex_id}
-            AND {out_table}.{dest} = {vertex_id}
+            WHERE {out_table}.{src} = {vertex_table}.{vertex_id}
+            AND {out_table}.{dest} = {vertex_table}.{vertex_id}
             """.format(**locals()))
 
         # Distance = 1: every edge means there is a path from src to dest
diff --git a/src/ports/postgres/modules/graph/bfs.py_in b/src/ports/postgres/modules/graph/bfs.py_in
index f0c07ac..6fc008e 100644
--- a/src/ports/postgres/modules/graph/bfs.py_in
+++ b/src/ports/postgres/modules/graph/bfs.py_in
@@ -286,7 +286,7 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table,
         toupdate = unique_string(desp='toupdate')
         insert_toupdate_table = """
             CREATE TEMP TABLE {toupdate} AS
-            SELECT {grp_sube_comma} {dest} AS {vertex_id}, {src} AS {parent_col}
+            SELECT {grp_sube_comma} {sube}.{dest} AS {vertex_id}, {sube}.{src} AS {parent_col}
             FROM (
                 SELECT {grp_comma} {src}, {dest}
                 FROM {edge_table}
@@ -303,8 +303,8 @@ def graph_bfs(schema_madlib, vertex_table, vertex_id, edge_table,
         if not directed:
             insert_toupdate_table += """
                 UNION ALL
-                SELECT {grp_sube1_comma} {src} AS {vertex_id},
-                        {dest} AS {parent_col}
+                SELECT {grp_sube1_comma} {sube1}.{src} AS {vertex_id},
+                        {sube1}.{dest} AS {parent_col}
                 FROM (
                     SELECT {grp_comma} {src}, {dest}
                     FROM {edge_table}
diff --git a/src/ports/postgres/modules/graph/graph_utils.py_in b/src/ports/postgres/modules/graph/graph_utils.py_in
index b0eaee4..c104f8e 100644
--- a/src/ports/postgres/modules/graph/graph_utils.py_in
+++ b/src/ports/postgres/modules/graph/graph_utils.py_in
@@ -101,7 +101,7 @@ def validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params,
         **locals()))
 
     existing_cols = set(unquote_ident(i) for i in get_cols(vertex_table))
-    _assert(vertex_id in existing_cols,
+    _assert(unquote_ident(vertex_id) in existing_cols,
             """Graph {func_name}: The vertex column {vertex_id} is not present in vertex table ({vertex_table}) """.
             format(**locals()))
     _assert(columns_exist_in_table(edge_table, edge_params.values()),
diff --git a/src/ports/postgres/modules/graph/hits.py_in b/src/ports/postgres/modules/graph/hits.py_in
index 23b6b1c..4864160 100644
--- a/src/ports/postgres/modules/graph/hits.py_in
+++ b/src/ports/postgres/modules/graph/hits.py_in
@@ -211,11 +211,11 @@ def hits(schema_madlib, vertex_table, vertex_id, edge_table, edge_args,
                     {authority_init_value}::DOUBLE PRECISION AS authority,
                     {hub_init_value}::DOUBLE PRECISION AS hub
             FROM (
-                SELECT {select_subquery1_grouping_cols_comma} {vertex_id}
+                SELECT {select_subquery1_grouping_cols_comma} {vertex_table}.{vertex_id}
                 FROM {edge_temp_table} JOIN {vertex_table}
                 ON {edge_temp_table}.{src}={vertex_table}.{vertex_id}
                 UNION
-                SELECT {select_subquery1_grouping_cols_comma} {vertex_id}
+                SELECT {select_subquery1_grouping_cols_comma} {vertex_table}.{vertex_id}
                 FROM {edge_temp_table} JOIN {vertex_table}
                 ON {edge_temp_table}.{dest}={vertex_table}.{vertex_id}
             ) {subquery1}
@@ -529,7 +529,7 @@ suffix '_summary' to the 'out_table' parameter.
 ----------------------------------------------------------------------------
                                 SUMMARY
 ----------------------------------------------------------------------------
-Given a directed graph, hits algorithm finds the authority and hub scores of 
+Given a directed graph, hits algorithm finds the authority and hub scores of
 all the vertices in the graph.
 --
 For an overview on usage, run:
diff --git a/src/ports/postgres/modules/graph/measures.py_in b/src/ports/postgres/modules/graph/measures.py_in
index d214948..a3dd778 100644
--- a/src/ports/postgres/modules/graph/measures.py_in
+++ b/src/ports/postgres/modules/graph/measures.py_in
@@ -91,7 +91,7 @@ class Graph(object):
                 "Graph: Edge table ({0}) is empty".format(self.edge_table))
 
         existing_cols = set(unquote_ident(i) for i in get_cols(self.vertex_table))
-        _assert(self.vertex_id_col in existing_cols,
+        _assert(unquote_ident(self.vertex_id_col) in existing_cols,
                 "Graph: The vertex column {0} is not present in "
                 "vertex table ({1})".format(self.vertex_id_col, self.vertex_table))
 
diff --git a/src/ports/postgres/modules/graph/pagerank.py_in b/src/ports/postgres/modules/graph/pagerank.py_in
index e39d216..a0f7319 100644
--- a/src/ports/postgres/modules/graph/pagerank.py_in
+++ b/src/ports/postgres/modules/graph/pagerank.py_in
@@ -455,7 +455,7 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
                             WHERE __t1__.{src}=__t2__.{dest}
                             AND {where_group_clause}
                         ) {vpg_where_clause_ins}
-                        GROUP BY {select_group_cols}, {vertex_id}, pagerank
+                        GROUP BY {select_group_cols}, __t1__.{src}, pagerank
                     """.format(
                     select_group_cols=get_table_qualified_col_str(
                         '__t1__', grouping_cols_list),
diff --git a/src/ports/postgres/modules/graph/test/apsp.sql_in b/src/ports/postgres/modules/graph/test/apsp.sql_in
index 3f7af8a..c7bb163 100644
--- a/src/ports/postgres/modules/graph/test/apsp.sql_in
+++ b/src/ports/postgres/modules/graph/test/apsp.sql_in
@@ -97,3 +97,15 @@ SELECT assert(weight = 2, 'Wrong output in graph (APSP)')
 SELECT assert(parent = 2, 'Wrong output in graph (APSP)')
 	FROM out_gr WHERE src = 0 AND "DEST" = 5 AND grp = 1;
 
+-- Test for common column names in vertex and edge tables
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN id TO src;
+
+SELECT graph_apsp('vertex','src','edge_gr','dest="DEST"','out','grp');
+SELECT * FROM out ORDER BY src,"DEST";
+
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN src TO "DEST";
+
+SELECT graph_apsp('vertex','"DEST"','edge_gr','dest="DEST"','out','grp');
+SELECT * FROM out ORDER BY src,"DEST";
diff --git a/src/ports/postgres/modules/graph/test/bfs.sql_in b/src/ports/postgres/modules/graph/test/bfs.sql_in
index d2e48c5..052d6b2 100644
--- a/src/ports/postgres/modules/graph/test/bfs.sql_in
+++ b/src/ports/postgres/modules/graph/test/bfs.sql_in
@@ -274,3 +274,16 @@ SELECT assert(
 SELECT assert(MIN(g1) = 100 AND MAX(g1) = 100,
 	'Wrong output in graph (BFS)') FROM out_frombfs GROUP BY g1,g2
 ;
+
+-- Test for common column names in vertex and edge tables
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN id TO "SRC";
+
+SELECT graph_bfs('vertex','"SRC"','edge_grp','src="SRC"',3,'out',NULL,NULL,'g1');
+SELECT * FROM out;
+
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN "SRC" TO dest;
+
+SELECT graph_bfs('vertex','dest','edge_grp','src="SRC"',3,'out',NULL,NULL,'g1');
+SELECT * FROM out;
diff --git a/src/ports/postgres/modules/graph/test/hits.sql_in b/src/ports/postgres/modules/graph/test/hits.sql_in
index 0cbe962..32b56e2 100644
--- a/src/ports/postgres/modules/graph/test/hits.sql_in
+++ b/src/ports/postgres/modules/graph/test/hits.sql_in
@@ -156,3 +156,16 @@ SELECT assert(relative_error(hub , :expected_hub1) < :tolerance,
 SELECT assert(relative_error(hub , :expected_hub2) < :tolerance,
         'HITS: Incorrect value for hub score. Expected: ' || :expected_hub2 || ' Actual: ' || hub
     ) FROM hits_out WHERE id=5 and user_id = 2;
+
+-- Test for common column names in vertex and edge tables
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN id TO src;
+
+SELECT hits('vertex','src','edge',NULL,'out',3,0.01,'user_id');
+SELECT * FROM out;
+
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN src TO dest;
+
+SELECT hits('vertex','dest','edge',NULL,'out',3,0.01,'user_id');
+SELECT * FROM out;
diff --git a/src/ports/postgres/modules/graph/test/measures.sql_in b/src/ports/postgres/modules/graph/test/measures.sql_in
index e12b7df..f6e87bd 100644
--- a/src/ports/postgres/modules/graph/test/measures.sql_in
+++ b/src/ports/postgres/modules/graph/test/measures.sql_in
@@ -184,3 +184,18 @@ SELECT * FROM out_degrees ORDER BY grp, id;
 SELECT assert(COUNT(*)=1, 'Invalid value for node with only one outgoing edge, with grouping.')
 FROM out_degrees
 WHERE id=7 AND grp = 0;
+
+-- Test for common column names in vertex and edge tables
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN id TO src_id;
+
+SELECT graph_vertex_degrees('vertex','src_id','"EDGE"',
+    'src=src_id, dest="DEST_ID", weight=edge_weight','out');
+SELECT * FROM out;
+
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN src_id TO "DEST_ID";
+
+SELECT graph_vertex_degrees('vertex','"DEST_ID"','"EDGE"',
+    'src=src_id, dest="DEST_ID", weight=edge_weight','out');
+SELECT * FROM out;
diff --git a/src/ports/postgres/modules/graph/test/pagerank.sql_in b/src/ports/postgres/modules/graph/test/pagerank.sql_in
index e797812..ba45c30 100644
--- a/src/ports/postgres/modules/graph/test/pagerank.sql_in
+++ b/src/ports/postgres/modules/graph/test/pagerank.sql_in
@@ -192,3 +192,16 @@ NULL, -- Default Threshold
 SELECT assert(relative_error(SUM(pagerank), 1) < 0.00001,
         'PageRank: Scores do not sum up to 1 for group 1.'
     ) FROM pagerank_gr_out WHERE user_id=1;
+
+-- Test for common column names in vertex and edge tables
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN id TO src;
+
+SELECT pagerank('vertex','src','"EDGE"',NULL,'out',NULL,NULL,NULL,'user_id');
+SELECT * FROM out;
+
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN src TO dest;
+
+SELECT pagerank('vertex','dest','"EDGE"',NULL,'out',NULL,NULL,NULL,'user_id');
+SELECT * FROM out;
diff --git a/src/ports/postgres/modules/graph/test/sssp.sql_in b/src/ports/postgres/modules/graph/test/sssp.sql_in
index e0c3ef8..eab2b38 100644
--- a/src/ports/postgres/modules/graph/test/sssp.sql_in
+++ b/src/ports/postgres/modules/graph/test/sssp.sql_in
@@ -140,3 +140,16 @@ SELECT * FROM "out_Q_summary";
 SELECT graph_sssp_get_path('"out_Q"',5,'"out_Q_path"');
 
 SELECT * FROM "out_Q_path";
+
+-- Test for common column names in vertex and edge tables
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN id TO src;
+
+SELECT graph_sssp('vertex','src','edge_gr',NULL,0,'out','grp');
+SELECT * FROM out;
+
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN src TO dest;
+
+SELECT graph_sssp('vertex','dest','edge_gr',NULL,0,'out','grp');
+SELECT * FROM out;
diff --git a/src/ports/postgres/modules/graph/test/wcc.sql_in b/src/ports/postgres/modules/graph/test/wcc.sql_in
index 2325325..1b9dcaa 100644
--- a/src/ports/postgres/modules/graph/test/wcc.sql_in
+++ b/src/ports/postgres/modules/graph/test/wcc.sql_in
@@ -158,3 +158,17 @@ SELECT assert(relative_error(num_components, 3) < 0.00001,
         'Weakly Connected Components: Incorrect largest component value.'
     ) FROM count_table WHERE user_id=1;
 
+-- Test for common column names in vertex and edge tables
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN vertex_id TO src;
+
+SELECT weakly_connected_components('vertex','src','"EDGE"',
+    'src=src_node,dest=dest_node','out','user_id');
+SELECT * FROM out;
+
+DROP TABLE IF EXISTS out, out_summary;
+ALTER TABLE vertex RENAME COLUMN src TO dest;
+
+SELECT weakly_connected_components('vertex','dest','"EDGE"',
+    'src=src_node,dest=dest_node','out','user_id');
+SELECT * FROM out;