You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2022/07/07 08:03:42 UTC

[madlib] branch master updated: Graph: Add multi column support for Pagerank and WCC

This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/madlib.git


The following commit(s) were added to refs/heads/master by this push:
     new 8a24663c Graph: Add multi column support for Pagerank and WCC
8a24663c is described below

commit 8a24663ccf2ee76ef538c416ed505485b740ff04
Author: Orhan Kislal <ok...@apache.org>
AuthorDate: Mon Jun 13 20:03:05 2022 -0400

    Graph: Add multi column support for Pagerank and WCC
    
    JIRA: MADLIB-1502, MADLIB-1503
    
    This commit adds support for identifying vertices with multiple columns
    for Pagerank and WCC modules. The rest of the graph modules will add the
    same functionality in later commits.
    
    The multi column support is done by creating views in which we collect
    the given BIGINT columns into a single BIGINT array.
    
    Co-authored-by: Bhuvnesh Chaudhary <bc...@pivotal.io>
---
 doc/example/madlib_pagerank_example.sql            | 172 +++++++++++++
 doc/example/madlib_wcc_example.sql                 | 170 +++++++++++++
 .../postgres/modules/dbscan/test/dbscan.sql_in     |   4 +-
 src/ports/postgres/modules/graph/graph_utils.py_in |  21 +-
 src/ports/postgres/modules/graph/pagerank.py_in    | 267 +++++++++++++-------
 src/ports/postgres/modules/graph/pagerank.sql_in   | 270 ++++++++++++++-------
 .../postgres/modules/graph/test/pagerank.sql_in    |  98 +++++++-
 src/ports/postgres/modules/graph/test/wcc.sql_in   | 101 +++++++-
 src/ports/postgres/modules/graph/wcc.py_in         | 190 +++++++++++++--
 src/ports/postgres/modules/graph/wcc.sql_in        | 205 +++++++++++-----
 .../postgres/modules/utilities/validate_args.py_in |   4 +
 11 files changed, 1226 insertions(+), 276 deletions(-)

diff --git a/doc/example/madlib_pagerank_example.sql b/doc/example/madlib_pagerank_example.sql
new file mode 100644
index 00000000..a1369c46
--- /dev/null
+++ b/doc/example/madlib_pagerank_example.sql
@@ -0,0 +1,172 @@
+/* ----------------------------------------------------------------------- *//**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *//* ----------------------------------------------------------------------- */
+
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+    node_id INTEGER
+    );
+CREATE TABLE edge(
+    conn_src INTEGER,
+    conn_dest INTEGER,
+    user_id INTEGER
+);
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6);
+INSERT INTO edge VALUES
+(0, 1, 1),
+(0, 2, 1),
+(0, 4, 1),
+(1, 2, 1),
+(1, 3, 1),
+(2, 3, 1),
+(2, 5, 1),
+(2, 6, 1),
+(3, 0, 1),
+(4, 0, 1),
+(5, 6, 1),
+(6, 3, 1),
+(0, 1, 2),
+(0, 2, 2),
+(0, 4, 2),
+(1, 2, 2),
+(1, 3, 2),
+(2, 3, 2),
+(3, 0, 2),
+(4, 0, 2),
+(5, 6, 2),
+(6, 3, 2);
+
+DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
+SELECT madlib.pagerank(
+                       'vertex',                        -- Vertex table
+                       'node_id',                       -- Vertex id column
+                       'edge',                          -- Edge table
+                       'src=conn_src, dest=conn_dest',  -- Comma delimted string of edge arguments
+                       'pagerank_out');                 -- Output table of PageRank
+SELECT * FROM pagerank_out ORDER BY pagerank DESC;
+
+SELECT * FROM pagerank_out_summary;
+
+DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
+SELECT madlib.pagerank(
+                       'vertex',                        -- Vertex table
+                       'node_id',                       -- Vertex id column
+                       'edge',                          -- Edge table
+                       'src=conn_src, dest=conn_dest',  -- Comma delimted string of edge arguments
+                       'pagerank_out',                  -- Output table of PageRank
+                       0.5);                            -- Damping factor
+SELECT * FROM pagerank_out ORDER BY pagerank DESC;
+
+DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
+SELECT madlib.pagerank(
+                       'vertex',                        -- Vertex table
+                       'node_id',                       -- Vertex id column
+                       'edge',                          -- Edge table
+                       'src=conn_src, dest=conn_dest',  -- Comma delimted string of edge arguments
+                       'pagerank_out',                  -- Output table of PageRank
+                       NULL,                            -- Default damping factor (0.85)
+                       NULL,                            -- Default max iters (100)
+                       0.00000001,                      -- Threshold
+                       'user_id');                      -- Grouping column name
+SELECT * FROM pagerank_out ORDER BY user_id, pagerank DESC;
+
+SELECT * FROM pagerank_out_summary ORDER BY user_id;
+
+DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
+SELECT madlib.pagerank(
+                       'vertex',                        -- Vertex table
+                       'node_id',                       -- Vertex id column
+                       'edge',                          -- Edge table
+                       'src=conn_src, dest=conn_dest',  -- Comma delimted string of edge arguments
+                       'pagerank_out',                  -- Output table of PageRank
+                        NULL,                           -- Default damping factor (0.85)
+                        NULL,                           -- Default max iters (100)
+                        NULL,                           -- Default Threshold
+                        NULL,                           -- No Grouping
+                       '{2,4}');                        -- Personalization vertices
+SELECT * FROM pagerank_out ORDER BY pagerank DESC;
+SELECT * FROM pagerank_out_summary;
+
+DROP TABLE IF EXISTS vertex_multicol_pagerank, edge_multicol_pagerank;
+CREATE TABLE vertex_multicol_pagerank(
+    node_id_major BIGINT,
+    node_id_minor BIGINT
+);
+CREATE TABLE edge_multicol_pagerank(
+    conn_src_major BIGINT,
+    conn_dest_major BIGINT,
+    user_id_major BIGINT,
+    conn_src_minor BIGINT,
+    conn_dest_minor BIGINT,
+    user_id_minor BIGINT
+);
+INSERT INTO vertex_multicol_pagerank VALUES
+(0, 0),
+(1, 1),
+(2, 2),
+(3, 3),
+(4, 4),
+(5, 5),
+(6, 6);
+INSERT INTO edge_multicol_pagerank VALUES
+(0, 1, 1, 0, 1, 1),
+(0, 2, 1, 0, 2, 1),
+(0, 4, 1, 0, 4, 1),
+(1, 2, 1, 1, 2, 1),
+(1, 3, 1, 1, 3, 1),
+(2, 3, 1, 2, 3, 1),
+(2, 5, 1, 2, 5, 1),
+(2, 6, 1, 2, 6, 1),
+(3, 0, 1, 3, 0, 1),
+(4, 0, 1, 4, 0, 1),
+(5, 6, 1, 5, 6, 1),
+(6, 3, 1, 6, 3, 1),
+(0, 1, 2, 0, 1, 2),
+(0, 2, 2, 0, 2, 2),
+(0, 4, 2, 0, 4, 2),
+(1, 2, 2, 1, 2, 2),
+(1, 3, 2, 1, 3, 2),
+(2, 3, 2, 2, 3, 2),
+(3, 0, 2, 3, 0, 2),
+(4, 0, 2, 4, 0, 2),
+(5, 6, 2, 5, 6, 2),
+(6, 3, 2, 6, 3, 2);
+
+DROP TABLE IF EXISTS pagerank_multicol_out, pagerank_multicol_out_summary;
+SELECT madlib.pagerank(
+                       'vertex_multicol_pagerank',                                                      -- Vertex table
+                       '[node_id_major,node_id_minor]',                                                 -- Vertex id column
+                       'edge_multicol_pagerank',                                                        -- Edge table
+                       'src=[conn_src_major,conn_src_minor], dest=[conn_dest_major,conn_dest_minor]',   -- Comma delimted string of edge arguments
+                       'pagerank_multicol_out',                                                         -- Output table of PageRank
+                        NULL,                                                                           -- Default damping factor (0.85)
+                        NULL,                                                                           -- Default max iters (100)
+                        NULL,                                                                           -- Default Threshold
+                       'user_id_major,user_id_minor',                                                   -- Grouping Columns
+                       '{{2,2},{4,4}}');                                                                -- Personalization vertices
+SELECT * FROM pagerank_multicol_out ORDER BY pagerank DESC;
+SELECT * FROM pagerank_multicol_out_summary;
diff --git a/doc/example/madlib_wcc_example.sql b/doc/example/madlib_wcc_example.sql
new file mode 100644
index 00000000..9aadb0a9
--- /dev/null
+++ b/doc/example/madlib_wcc_example.sql
@@ -0,0 +1,170 @@
+/* ----------------------------------------------------------------------- *//**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *//* ----------------------------------------------------------------------- */
+
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+    node_id INTEGER
+);
+CREATE TABLE edge(
+    conn_src INTEGER,
+    conn_dest INTEGER,
+    user_id INTEGER
+);
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6),
+(10),
+(11),
+(12),
+(13),
+(14),
+(15),
+(16);
+INSERT INTO edge VALUES
+(0, 1, 1),
+(0, 2, 1),
+(1, 2, 1),
+(1, 3, 1),
+(2, 3, 1),
+(2, 5, 1),
+(2, 6, 1),
+(3, 0, 1),
+(5, 6, 1),
+(6, 3, 1),
+(10, 11, 2),
+(10, 12, 2),
+(11, 12, 2),
+(11, 13, 2),
+(12, 13, 2),
+(13, 10, 2),
+(15, 16, 2),
+(15, 14, 2);
+
+DROP TABLE IF EXISTS wcc_out, wcc_out_summary;
+SELECT madlib.weakly_connected_components(
+    'vertex',                        -- Vertex table
+    'node_id',                       -- Vertex id column
+    'edge',                          -- Edge table
+    'src=conn_src, dest=conn_dest',  -- Comma delimted string of edge arguments
+    'wcc_out');                      -- Output table of weakly connected components
+SELECT * FROM wcc_out ORDER BY component_id, id;
+
+DROP TABLE IF EXISTS wcc_out, wcc_out_summary;
+SELECT madlib.weakly_connected_components(
+    'vertex',                       -- Vertex table
+    'node_id',                      -- Vertex id column
+    'edge',                         -- Edge table
+    'src=conn_src, dest=conn_dest', -- Comma delimted string of edge arguments
+    'wcc_out',                      -- Output table of weakly connected components
+    'user_id');                     -- Grouping column name
+SELECT * FROM wcc_out ORDER BY user_id, component_id, id;
+
+DROP TABLE IF EXISTS largest_cpt_table;
+SELECT madlib.graph_wcc_largest_cpt(
+                         'wcc_out',             -- WCC output table
+                         'largest_cpt_table');  -- output table containing largest component ID
+SELECT * FROM largest_cpt_table ORDER BY component_id;
+
+DROP TABLE IF EXISTS histogram_table;
+SELECT madlib.graph_wcc_histogram(
+                         'wcc_out',           -- WCC output table
+                         'histogram_table');  -- output table containing the histogram of vertices
+SELECT * FROM histogram_table ORDER BY component_id;
+
+DROP TABLE IF EXISTS vc_table;
+SELECT madlib.graph_wcc_vertex_check(
+                         'wcc_out',    -- WCC output table
+                         '14,15',      -- Pair of vertex IDs
+                         'vc_table');  -- output table containing components that contain the two vertices
+SELECT * FROM vc_table ORDER BY component_id;
+
+DROP TABLE IF EXISTS reach_table;
+SELECT madlib.graph_wcc_reachable_vertices(
+                         'wcc_out',         -- WCC output table
+                         '0',               -- source vertex
+                         'reach_table');    -- output table containing all vertices reachable from source vertex
+SELECT * FROM reach_table ORDER BY component_id, dest;
+
+DROP TABLE IF EXISTS count_table;
+SELECT madlib.graph_wcc_num_cpts(
+                         'wcc_out',       -- WCC output table
+                         'count_table');  -- output table containing number of components per group
+SELECT * FROM count_table;
+
+DROP TABLE IF EXISTS vertex_multicol_wcc, edge_multicol_wcc;
+CREATE TABLE vertex_multicol_wcc(
+    node_id_major BIGINT,
+    node_id_minor BIGINT
+);
+CREATE TABLE edge_multicol_wcc(
+    conn_src_major BIGINT,
+    conn_dest_major BIGINT,
+    user_id_major BIGINT,
+    conn_src_minor BIGINT,
+    conn_dest_minor BIGINT,
+    user_id_minor BIGINT
+);
+INSERT INTO vertex_multicol_wcc VALUES
+(0, 0),
+(1, 1),
+(2, 2),
+(3, 3),
+(4, 4),
+(5, 5),
+(6, 6);
+INSERT INTO edge_multicol_wcc VALUES
+(0, 1, 1, 0, 1, 1),
+(0, 2, 1, 0, 2, 1),
+(0, 4, 1, 0, 4, 1),
+(1, 2, 1, 1, 2, 1),
+(1, 3, 1, 1, 3, 1),
+(2, 3, 1, 2, 3, 1),
+(2, 5, 1, 2, 5, 1),
+(2, 6, 1, 2, 6, 1),
+(3, 0, 1, 3, 0, 1),
+(4, 0, 1, 4, 0, 1),
+(5, 6, 1, 5, 6, 1),
+(6, 3, 1, 6, 3, 1),
+(0, 1, 2, 0, 1, 2),
+(0, 2, 2, 0, 2, 2),
+(0, 4, 2, 0, 4, 2),
+(1, 2, 2, 1, 2, 2),
+(1, 3, 2, 1, 3, 2),
+(2, 3, 2, 2, 3, 2),
+(3, 0, 2, 3, 0, 2),
+(4, 0, 2, 4, 0, 2),
+(5, 6, 2, 5, 6, 2),
+(6, 3, 2, 6, 3, 2);
+
+DROP TABLE IF EXISTS wcc_multicol_out, wcc_multicol_out_summary;
+SELECT madlib.weakly_connected_components(
+    'vertex_multicol_wcc',                                                          -- Vertex table
+    '[node_id_major,node_id_minor]',                                                -- Vertex id column
+    'edge_multicol_wcc',                                                            -- Edge table
+    'src=[conn_src_major,conn_src_minor], dest=[conn_dest_major,conn_dest_minor]',  -- Comma delimted string of edge arguments
+    'wcc_multicol_out',                                                             -- Output table of weakly connected components
+    'user_id_major,user_id_minor');                                                 -- Grouping column name
+SELECT * FROM wcc_multicol_out ORDER BY user_id_major, user_id_minor, component_id, id;
diff --git a/src/ports/postgres/modules/dbscan/test/dbscan.sql_in b/src/ports/postgres/modules/dbscan/test/dbscan.sql_in
index 5756b99d..895b675b 100644
--- a/src/ports/postgres/modules/dbscan/test/dbscan.sql_in
+++ b/src/ports/postgres/modules/dbscan/test/dbscan.sql_in
@@ -43,9 +43,9 @@ copy dbscan_train_data (id_in, data) FROM stdin delimiter '|';
 DROP TABLE IF EXISTS out1, out1_summary, out1_predict;
 SELECT dbscan('dbscan_train_data','out1','id_in','data',20,4,'squared_dist_norm2','brute');
 
-SELECT assert(count(DISTINCT id) = 5, 'Incorrect cluster 0') FROM out1 WHERE cluster_id = 0 and id=ANY(ARRAY[1,2,3,4,5]);
+SELECT assert(count(DISTINCT id) = 5, 'Incorrect cluster 0') FROM out1 WHERE id <= 5;
 
-SELECT assert(count(DISTINCT id) = 4, 'Incorrect cluster 1') FROM out1 WHERE cluster_id = 1 and id=ANY(ARRAY[6,7,8,9]);
+SELECT assert(count(DISTINCT id) = 4, 'Incorrect cluster 1') FROM out1 WHERE id >= 6;
 
 SELECT assert(id_column = 'id_in', 'id_column field in summary table should have been ''id_in''') FROM out1_summary;
 
diff --git a/src/ports/postgres/modules/graph/graph_utils.py_in b/src/ports/postgres/modules/graph/graph_utils.py_in
index 889ef88c..bd89d123 100644
--- a/src/ports/postgres/modules/graph/graph_utils.py_in
+++ b/src/ports/postgres/modules/graph/graph_utils.py_in
@@ -73,7 +73,6 @@ def validate_output_and_summary_tables(model_out_table, module_name,
         _assert(not table_exists(out_table),
                 "Graph WCC: Output table {0} already exists.".format(out_table))
 
-
 def validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params,
                           out_table, func_name, **kwargs):
     """
@@ -102,15 +101,19 @@ def validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params,
             "Graph {func_name}: Edge table ({edge_table}) is empty!".format(
         **locals()))
 
-    existing_cols = set(unquote_ident(i) for i in get_cols(vertex_table))
-    _assert(unquote_ident(vertex_id) in existing_cols,
-            """Graph {func_name}: The vertex column {vertex_id} is not present in vertex table ({vertex_table}) """.
-            format(**locals()))
-    _assert(columns_exist_in_table(edge_table, edge_params.values()),
-            """Graph {func_name}: Not all columns from {cols} are present in edge table ({edge_table})""".
-            format(cols=edge_params.values(), **locals()))
+    _assert(columns_exist_in_table(vertex_table, vertex_id),
+        """Graph {func_name}: Not all columns from \"{vertex_id}\" are present in vertex table ({vertex_table})""".
+        format(**locals()))
+
+    src = edge_params["src"]
+    dest = edge_params["dest"]
 
-    return None
+    _assert(columns_exist_in_table(edge_table, src),
+        """Graph {func_name}: Not all columns from \"{src}\" are present in edge table ({edge_table})""".
+        format(**locals()))
+    _assert(columns_exist_in_table(edge_table, dest),
+        """Graph {func_name}: Not all columns from \"{dest}\" are present in edge table ({edge_table})""".
+        format(**locals()))
 
 def validate_params_for_link_analysis(schema_madlib, func_name,
                                             threshold, max_iter,
diff --git a/src/ports/postgres/modules/graph/pagerank.py_in b/src/ports/postgres/modules/graph/pagerank.py_in
index 830432b5..3a6b7b3b 100644
--- a/src/ports/postgres/modules/graph/pagerank.py_in
+++ b/src/ports/postgres/modules/graph/pagerank.py_in
@@ -49,12 +49,13 @@ from utilities.utilities import py_list_to_sql_string
 
 from utilities.validate_args import columns_exist_in_table, get_cols_and_types
 from utilities.validate_args import table_exists
+from utilities.validate_args import unquote_ident
 from utilities.utilities import rename_table
 
 
 def validate_pagerank_args(schema_madlib, vertex_table, vertex_id, edge_table,
                            edge_params, out_table, damping_factor, max_iter,
-                           threshold, grouping_cols_list, personalization_vertices):
+                           threshold, grouping_cols_list):
     """
     Function to validate input parameters for PageRank
     """
@@ -68,42 +69,40 @@ def validate_pagerank_args(schema_madlib, vertex_table, vertex_id, edge_table,
             "PageRank: Invalid damping factor value ({0}), must be between 0 and 1.".
             format(damping_factor))
 
-
+def validate_personaliztion_vertices(schema_madlib, vertex_table, vertex_id, vertex_type, edge_table,
+                                     src, dest, grouping_cols_list, personalization_vertices,
+                                     personalization_vertices_str, personalization_vertices_join):
     # Validate against the given set of nodes for Personalized Page Rank
-    if personalization_vertices:
-        grouping_cols = get_table_qualified_col_str(
-            edge_table, grouping_cols_list)
-        group_by_clause = "GROUP BY {0}".format(grouping_cols) \
-            if grouping_cols_list else ''
-        src = edge_params["src"]
-        dest = edge_params["dest"]
-        input_personalization_vertices_length = len(personalization_vertices)
-
-        personalization_vertices_str = ','.join([str(i) for i in personalization_vertices])
-
-        # Get a list which has the number of personalization nodes of each group
-        vertices_count_list_by_group = plpy.execute("""
-                SELECT count(distinct {vertex_id}) AS count
-                FROM {vertex_table}
-                RIGHT JOIN {edge_table}
-                ON ({vertex_table}.{vertex_id} = {edge_table}.{src}
-                OR {vertex_table}.{vertex_id} = {edge_table}.{dest})
-                AND {vertex_table}.{vertex_id} = ANY(ARRAY[{personalization_vertices_str}])
-                {group_by_clause}
-            """.format(**locals()))
-
-
-        # The number of personalization nodes for every group should be equal to
-        # the number given by input personalization_vertices list. Otherwise,
-        # some nodes are missing for certain group. Or there might be duplicate
-        # nodes in input personalization_vertices list. Or there are some
-        # invalid nodes in input list that don't exist in vertex table. In any
-        # case, throw an error.
-        for key in vertices_count_list_by_group:
-            if key["count"] != input_personalization_vertices_length:
-                plpy.error("Personalization nodes must be a subset "
-                           "of the vertex_table without duplicates and "
-                           "every nodes should be present in all the groups")
+
+    grouping_cols = get_table_qualified_col_str(
+        edge_table, grouping_cols_list)
+    group_by_clause = "GROUP BY {0}".format(grouping_cols) \
+        if grouping_cols_list else ''
+    input_personalization_vertices_length = len(personalization_vertices)
+
+    # Get a list which has the number of personalization nodes of each group
+    sql = """
+            SELECT count(distinct {vertex_id}) AS count
+            FROM {vertex_table}
+            RIGHT JOIN {edge_table}
+            ON ({vertex_table}.{vertex_id} = {edge_table}.{src}
+            OR {vertex_table}.{vertex_id} = {edge_table}.{dest})
+            RIGHT JOIN {personalization_vertices_join}
+            ON {vertex_table}.{vertex_id}::{vertex_type} = unnest_result
+            {group_by_clause}
+        """.format(**locals())
+    vertices_count_list_by_group = plpy.execute(sql)
+    # The number of personalization nodes for every group should be equal to
+    # the number given by input personalization_vertices list. Otherwise,
+    # some nodes are missing for certain group. Or there might be duplicate
+    # nodes in input personalization_vertices list. Or there are some
+    # invalid nodes in input list that don't exist in vertex table. In any
+    # case, throw an error.
+    for key in vertices_count_list_by_group:
+        if key["count"] != input_personalization_vertices_length:
+            plpy.error("Personalization nodes must be a subset "
+                       "of the vertex_table without duplicates and "
+                       "every nodes should be present in all the groups")
 
 
 def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_table,
@@ -128,10 +127,6 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
     with OptimizerControl(False):
 
         with MinWarning('warning'):
-            params_types = {'src': str, 'dest': str}
-            default_args = {'src': 'src', 'dest': 'dest'}
-            edge_params = extract_keyvalue_params(
-                edge_args, params_types, default_args)
 
             # populate default values for optional params if null
             if damping_factor is None:
@@ -142,19 +137,96 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
                 vertex_id = "id"
             if not grouping_cols:
                 grouping_cols = ''
+                grouping_sql = ''
+            else:
+                grouping_sql = ', {0}'.format(grouping_cols)
+
+            # vertex_id, src and dest are converted to list type
+            params_types = {'src': list, 'dest': list}
+            default_args = {'src': ['src'], 'dest': ['dest']}
+            edge_params = extract_keyvalue_params(
+                edge_args, params_types, default_args)
+
+            if vertex_id[0] == '[' and vertex_id[-1] == ']':
+                vertex_id = split_quoted_delimited_str(vertex_id[1:-1])
+                vertex_sql = "ARRAY[{0}]::BIGINT[]".format(','.join(vertex_id))
+                vertex_type = "BIGINT[]"
+            else:
+                vertex_sql = vertex_id
+                vertex_id = [vertex_id]
+                vertex_type = "BIGINT"
+
+            src_list = edge_params["src"]
+            if len(src_list) > 1:
+                src = "ARRAY[{0}]::BIGINT[]".format(','.join(edge_params["src"]))
+            else:
+                edge_params["src"] = edge_params["src"][0]
+                src = edge_params["src"]
+
+            dest_list = edge_params["dest"]
+            if len(dest_list) > 1:
+                dest = "ARRAY[{0}]::BIGINT[]".format(','.join(edge_params["dest"]))
+            else:
+                edge_params["dest"] = edge_params["dest"][0]
+                dest = edge_params["dest"]
 
             grouping_cols_list = split_quoted_delimited_str(grouping_cols)
             validate_pagerank_args(schema_madlib, vertex_table, vertex_id, edge_table,
                                    edge_params, out_table, damping_factor,
-                                   max_iter, threshold, grouping_cols_list,
-                                   personalization_vertices)
+                                   max_iter, threshold, grouping_cols_list)
+
+            vertex_view = unique_string('vertex_view')
+            edge_view = unique_string('edge_view')
+
+            sql = """
+                CREATE VIEW {vertex_view} AS
+                SELECT {vertex_sql} AS id
+                FROM {vertex_table}
+                """.format(**locals())
+            plpy.execute(sql)
+            sql = """
+                CREATE VIEW {edge_view} AS
+                SELECT {src} AS src, {dest} AS dest {grouping_sql}
+                FROM {edge_table}
+                """.format(**locals())
+            plpy.execute(sql)
+
+            vertex_table = vertex_view
+            edge_table = edge_view
+            vertex_id = 'id'
+            src = 'src'
+            dest = 'dest'
+
+            personalization_vertices_join = ''
+            pers_sql = ''
+            psubq = unique_string(desp='psubquery')
+            if personalization_vertices:
+                if isinstance(personalization_vertices[0], list):
+                    personalization_vertices_list = []
+                    for idx, i in enumerate(personalization_vertices):
+                        personalization_vertices_list.append('[' + ', '.join([str(j) for j in i]) + ']')
+
+                    personalization_vertices_str = ','.join(personalization_vertices_list)
+                    personalization_vertices_join = """
+                        (SELECT ({schema_madlib}.array_unnest_2d_to_1d(ARRAY[{personalization_vertices_str}])).unnest_result::{vertex_type}
+                        ) {psubq}
+                        """.format(**locals())
+                else:
+                    personalization_vertices_str = ','.join([str(i) for i in personalization_vertices])
+                    personalization_vertices_join = """
+                        (SELECT unnest(ARRAY[{personalization_vertices_str}])::{vertex_type} AS unnest_result) {psubq}
+                        """.format(**locals())
+                validate_personaliztion_vertices(schema_madlib, vertex_table,
+                                                 vertex_id, vertex_type, edge_table,
+                                                 src, dest, grouping_cols_list,
+                                                 personalization_vertices,
+                                                 personalization_vertices_str,
+                                                 personalization_vertices_join)
 
             summary_table = add_postfix(out_table, "_summary")
             _assert(not table_exists(summary_table),
                     "Graph PageRank: Output summary table ({summary_table}) already exists."
                     .format(**locals()))
-            src = edge_params["src"]
-            dest = edge_params["dest"]
             n_vertices = plpy.execute("""
                         SELECT COUNT({0}) AS cnt
                         FROM {1}
@@ -170,7 +242,7 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
             grouping_where_clause = ''
             group_by_clause = ''
             random_prob = ''
-            ppr_join_clause = ''
+            ppr_join = ''
 
             edge_temp_table = unique_string(desp='temp_edge')
             grouping_cols_comma = grouping_cols + ',' if grouping_cols else ''
@@ -212,16 +284,13 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
                 """.format(**locals())
 
             # Get query params for Personalized Page Rank.
-            ppr_params = ''
             total_ppr_nodes = 0
             random_jump_prob_ppr = 0
             ppr_init_value_clause = ''
             if personalization_vertices:
-                ppr_params = get_query_params_for_ppr(personalization_vertices, damping_factor,
-                                                      vertex_id, edge_temp_table, vertex_table, edge_params)
-                total_ppr_nodes = ppr_params[0]
-                random_jump_prob_ppr = ppr_params[1]
-                ppr_init_value_clause = ppr_params[2]
+                (total_ppr_nodes, random_jump_prob_ppr, ppr_init_value_clause) = \
+                    get_query_params_for_ppr(personalization_vertices, damping_factor,
+                                             vertex_id, edge_temp_table, vertex_table, edge_params)
 
             random_probability = (1.0 - damping_factor) / n_vertices
             if total_ppr_nodes > 0:
@@ -278,17 +347,16 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
                     distinct_grp_table, grouping_cols_list)
                 # Find number of vertices in each group, this is the normalizing
                 # factor for computing the random_prob
-                where_clause_ppr = ''
                 if personalization_vertices:
-                    personalization_vertices_str = ','.join(
-                        [str(i) for i in personalization_vertices])
-                    where_clause_ppr = """
-                        where __vertices__ = ANY(ARRAY[{personalization_vertices_str}])
-                    """.format(**locals())
                     random_prob_grp = 1.0 - damping_factor
                     init_prob_grp = 1.0 / total_ppr_nodes
+
+                    ppr_join = """
+                        INNER JOIN {personalization_vertices_join}
+                            ON {subq}.__vertices__ = {psubq}.unnest_result
+                        """.format(**locals())
                 else:
-                    personalization_vertices_str = ''
+                    ppr_join = ''
                     random_prob_grp = """
                             {rand_damp}/COUNT(__vertices__)::DOUBLE PRECISION
                         """.format(**locals())
@@ -301,7 +369,7 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
                 plpy.execute("""CREATE TEMP TABLE {vertices_per_group} AS
                         SELECT {distinct_grp_table}.*,
                         {init_prob_grp} AS {init_pr},
-                        {random_prob_grp} as {random_prob}
+                        {random_prob_grp} AS {random_prob}
                         FROM {distinct_grp_table} INNER JOIN (
                             SELECT {grouping_cols}, {src} AS __vertices__
                             FROM {edge_temp_table}
@@ -309,7 +377,7 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
                             SELECT {grouping_cols}, {dest} FROM {edge_temp_table}
                         ){subq}
                         ON {grouping_where_clause}
-                        {where_clause_ppr}
+                        {ppr_join}
                         GROUP BY {group_by_clause}
                     """.format(**locals()))
 
@@ -321,9 +389,14 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
                 if personalization_vertices:
                     init_prob_grp_ppr = 1.0 / total_ppr_nodes
                     init_pr = """
-                            CASE when __vertices__ = ANY(ARRAY[{personalization_vertices_str}])
+                            CASE when unnest_result != NULL
                             THEN {init_prob_grp_ppr} ELSE 0 END
                         """.format(**locals())
+                    pers_sql = """
+                            LEFT JOIN
+                            {personalization_vertices_join}
+                            ON {subq}.__vertices__ = {psubq}.unnest_result
+                        """.format(**locals())
 
                 plpy.execute("""
                         CREATE TEMP TABLE {cur} AS
@@ -336,6 +409,7 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
                             SELECT {grouping_cols}, {dest} FROM {edge_temp_table}
                         ){subq}
                         ON {grouping_where_clause}
+                        {pers_sql}
                         {cur_distribution}
                     """.format(**locals()))
                 vpg = unique_string(desp='vpg')
@@ -390,10 +464,11 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
                 # Create output table. This will be updated whenever a group converges
                 # Note that vertex_id is assumed to be an integer (as described in
                 # documentation)
+
                 plpy.execute("""
                         CREATE TABLE {out_table} (
                             {grouping_cols_clause},
-                            {vertex_id} BIGINT,
+                            {vertex_id} {vertex_type},
                             pagerank DOUBLE PRECISION
                         )
                     """.format(**locals()))
@@ -475,14 +550,20 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
                 # involved.
                 if total_ppr_nodes > 0:
                     init_value = ppr_init_value_clause
+                    ppr_join = """
+                        LEFT JOIN {personalization_vertices_join}
+                            ON {vertex_table}.{vertex_id} = {psubq}.unnest_result
+                        """.format(**locals())
                 else:
                     init_value = 1.0 / n_vertices
-                plpy.execute("""
+                sql = """
                         CREATE TEMP TABLE {cur} AS
-                        SELECT {vertex_id}, {init_value}::DOUBLE PRECISION AS pagerank
+                        SELECT {vertex_id}, {init_value} AS pagerank
                         FROM {vertex_table}
+                        {ppr_join}
                         {cur_distribution}
-                    """.format(**locals()))
+                    """.format(**locals())
+                plpy.execute(sql)
 
                 # Compute the out-degree of every node in the graph.
                 plpy.execute("DROP TABLE IF EXISTS {0}".format(out_cnts))
@@ -535,19 +616,28 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
                 # the condition that will help skip the PageRank computation on groups
                 # that have converged.
 
+                ppr_group = ''
+                if personalization_vertices_join:
+                    ppr_join = """
+                        LEFT JOIN {personalization_vertices_join}
+                                ON {edge_temp_table}.{dest} = {psubq}.unnest_result
+                    """.format(**locals())
+                    ppr_group = ", {psubq}.unnest_result ".format(**locals())
+
                 plpy.execute("""
                         CREATE TABLE {message} AS
                         SELECT {grouping_cols_select_pr}
                                 {edge_temp_table}.{dest} AS {vertex_id},
                                 SUM(({v1}.pagerank)/{out_cnts}.{out_cnts_cnt})*{damping_factor}+
-                                {random_jump_prob} AS  pagerank
+                                {random_jump_prob} AS pagerank
                         FROM {edge_temp_table}
                             INNER JOIN {cur} ON {cur_join_clause}
                             INNER JOIN {out_cnts} ON {out_cnts_join_clause}
                             INNER JOIN {cur} AS {v1} ON {v1_join_clause}
+                            {ppr_join}
                             {vertices_per_group_inner_join_pr}
                         {ignore_group_clause}
-                        GROUP BY {grouping_cols_select_pr} {edge_temp_table}.{dest}
+                        GROUP BY {grouping_cols_select_pr} {edge_temp_table}.{dest} {ppr_group}
                         {cur_distribution}
                     """.format(ignore_group_clause=ignore_group_clause_pr
                                if iteration_num > 0 else ignore_group_clause_first,
@@ -688,6 +778,7 @@ def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args, out_
             plpy.execute("""DROP TABLE IF EXISTS {0},{1},{2},{3},{4},{5},{6}
                 """.format(out_cnts, edge_temp_table, cur, message, cur_unconv,
                            message_unconv, nodes_with_no_incoming_edges))
+            plpy.execute("DROP VIEW IF EXISTS {0}, {1}".format(vertex_view, edge_view))
             if grouping_cols:
                 plpy.execute("""DROP TABLE IF EXISTS {0},{1},{2}
                     """.format(vertices_per_group, temp_summary_table,
@@ -713,30 +804,24 @@ def get_query_params_for_ppr(personalization_vertices, damping_factor,
              (Integer, String, String)
 
     """
-    total_ppr_nodes = 0
-    ppr_random_prob_clause = ''
-    ppr_init_prob_clause = ''
-
-    if personalization_vertices:
-        total_ppr_nodes = len(personalization_vertices)
-        ppr_init_value = 1.0 / total_ppr_nodes
-        prob_value = 1.0 - damping_factor
-        dest = edge_params["dest"]
-
-        personalization_vertices_str = ','.join([str(i) for i in personalization_vertices])
-
-        # In case of PPR, Assign the Random jump probability to the personalization_vertices only.
-        # For rest of the nodes, Random jump probability  will be zero.
-        ppr_random_prob_clause = """
-                CASE WHEN {edge_temp_table}.{dest} = ANY(ARRAY[{personalization_vertices_str}])
-                THEN {prob_value} ELSE 0 END
-            """.format(**locals())
-
-        ppr_init_prob_clause = """
-                CASE WHEN {vertex_id} = ANY(ARRAY[{personalization_vertices_str}])
-                THEN {ppr_init_value} ELSE 0 END
-            """.format(**locals())
-    return(total_ppr_nodes, ppr_random_prob_clause, ppr_init_prob_clause)
+
+    total_ppr_nodes = len(personalization_vertices)
+    ppr_init_value = 1.0 / total_ppr_nodes
+    prob_value = 1.0 - damping_factor
+    dest = edge_params["dest"]
+
+    # In case of PPR, Assign the Random jump probability to the personalization_vertices only.
+    # For rest of the nodes, Random jump probability  will be zero.
+    ppr_random_prob_clause = """
+            CASE WHEN unnest_result IS NOT NULL
+            THEN {prob_value} ELSE 0 END
+        """.format(**locals())
+
+    ppr_init_prob_clause = """
+            CASE WHEN unnest_result IS NOT NULL
+            THEN {ppr_init_value} ELSE 0 END
+        """.format(**locals())
+    return (total_ppr_nodes, ppr_random_prob_clause, ppr_init_prob_clause)
 
 
 def pagerank_help(schema_madlib, message, **kwargs):
diff --git a/src/ports/postgres/modules/graph/pagerank.sql_in b/src/ports/postgres/modules/graph/pagerank.sql_in
index 9adf8447..6bd87e76 100644
--- a/src/ports/postgres/modules/graph/pagerank.sql_in
+++ b/src/ports/postgres/modules/graph/pagerank.sql_in
@@ -76,10 +76,10 @@ pagerank( vertex_table,
 <dd>TEXT. Name of the table containing the vertex data for the graph. Must contain the
 column specified in the 'vertex_id' parameter below.</dd>
 
-<dt>vertex_id</dt>
-<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
+<dd>TEXT, default = 'id'. Name of the column(s) in 'vertex_table' containing
 vertex ids.  The vertex ids can be of type INTEGER or BIGINT with no duplicates.
-They do not need to be contiguous.</dd>
+They do not need to be contiguous. If multiple columns are used as vertex ids,
+they are passed in the following format: [<vertex_id1>,<vertex_id2>,...]</dd>
 
 <dt>edge_table</dt>
 <dd>TEXT. Name of the table containing the edge data. The edge table must
@@ -89,9 +89,9 @@ contain columns for source vertex and destination vertex.</dd>
 <dd>TEXT. A comma-delimited string containing multiple named arguments of
 the form "name=value". The following parameters are supported for
 this string argument:
-  - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the edge table.
+  - src (INTEGER or BIGINT): Name of the column(s) containing the source vertex ids in the edge table.
                    Default column name is 'src'.
-  - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids in the edge table.
+  - dest (INTEGER or BIGINT): Name of the column(s) containing the destination vertex ids in the edge table.
                     Default column name is 'dest'.</dd>
 
 <dt>out_table</dt>
@@ -130,23 +130,27 @@ a single model is generated for all data.
 <dt> personalization_vertices (optional)</dt>
 <dd>INTEGER[] or BIGINT[], default: NULL. A comma separated list of vertices or nodes
 for personalized PageRank. When this parameter is provided, personalized PageRank
-will run.  In the absence of this parameter, regular PageRank will run.
+will run. In the absence of this parameter, regular PageRank will run. If multiple
+columns are used for identifying vertices, a 2D array will be required for this
+parameter.
 </dl>
 
 @anchor examples
 @examp
 
+<a href="example/madlib_pagerank_example.sql">Download the example sql file here.</a>
+
 -# Create vertex and edge tables to represent the graph:
 <pre class="syntax">
 DROP TABLE IF EXISTS vertex, edge;
 CREATE TABLE vertex(
-        id INTEGER
-        );
+    node_id INTEGER
+    );
 CREATE TABLE edge(
-        src INTEGER,
-        dest INTEGER,
-        user_id INTEGER
-        );
+    conn_src INTEGER,
+    conn_dest INTEGER,
+    user_id INTEGER
+);
 INSERT INTO vertex VALUES
 (0),
 (1),
@@ -184,23 +188,23 @@ INSERT INTO edge VALUES
 <pre class="syntax">
 DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
 SELECT madlib.pagerank(
-                       'vertex',             -- Vertex table
-                       'id',                 -- Vertix id column
-                       'edge',               -- Edge table
-                       'src=src, dest=dest', -- Comma delimted string of edge arguments
-                       'pagerank_out');      -- Output table of PageRank
+                       'vertex',                        -- Vertex table
+                       'node_id',                       -- Vertex id column
+                       'edge',                          -- Edge table
+                       'src=conn_src, dest=conn_dest',  -- Comma delimted string of edge arguments
+                       'pagerank_out');                 -- Output table of PageRank
 SELECT * FROM pagerank_out ORDER BY pagerank DESC;
 </pre>
 <pre class="result">
- id |      pagerank
-----+-------------------
-  0 |  0.28753749341184
-  3 |  0.21016988901855
-  2 |  0.14662683454062
-  4 |  0.10289614384217
-  1 |  0.10289614384217
-  6 |  0.09728637768887
-  5 |  0.05258711765692
+ node_id |      pagerank
+---------+-------------------
+       0 |  0.28753749341184
+       3 |  0.21016988901855
+       2 |  0.14662683454062
+       4 |  0.10289614384217
+       1 |  0.10289614384217
+       6 |  0.09728637768887
+       5 |  0.05258711765692
 (7 rows)
 </pre>
 <pre class="syntax">
@@ -217,24 +221,24 @@ SELECT * FROM pagerank_out_summary;
 <pre class="syntax">
 DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
 SELECT madlib.pagerank(
-                         'vertex',             -- Vertex table
-                         'id',                 -- Vertix id column
-                         'edge',               -- Edge table
-                         'src=src, dest=dest', -- Comma delimted string of edge arguments
-                         'pagerank_out',       -- Output table of PageRank
-                         0.5);                 -- Damping factor
+                       'vertex',                        -- Vertex table
+                       'node_id',                       -- Vertex id column
+                       'edge',                          -- Edge table
+                       'src=conn_src, dest=conn_dest',  -- Comma delimted string of edge arguments
+                       'pagerank_out',                  -- Output table of PageRank
+                       0.5);                            -- Damping factor
 SELECT * FROM pagerank_out ORDER BY pagerank DESC;
 </pre>
 <pre class="result">
- id |      pagerank
-----+--------------------
-  0 |  0.225477161441199
-  3 |  0.199090328586664
-  2 |  0.136261327206477
-  6 |  0.132691559968224
-  4 |  0.109009291409508
-  1 |  0.109009291409508
-  5 | 0.0884610399788161
+ node_id |      pagerank
+---------+--------------------
+  0      |  0.225477161441199
+  3      |  0.199090328586664
+  2      |  0.136261327206477
+  6      |  0.132691559968224
+  4      |  0.109009291409508
+  1      |  0.109009291409508
+  5      | 0.0884610399788161
 (7 rows)
 </pre>
 
@@ -243,34 +247,34 @@ using the grouping feature:
 <pre class="syntax">
 DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
 SELECT madlib.pagerank(
-                         'vertex',             -- Vertex table
-                         'id',                 -- Vertix id column
-                         'edge',               -- Edge table
-                         'src=src, dest=dest', -- Comma delimted string of edge arguments
-                         'pagerank_out',       -- Output table of PageRank
-                         NULL,                 -- Default damping factor (0.85)
-                         NULL,                 -- Default max iters (100)
-                         0.00000001,           -- Threshold
-                         'user_id');           -- Grouping column name
+                       'vertex',                        -- Vertex table
+                       'node_id',                       -- Vertex id column
+                       'edge',                          -- Edge table
+                       'src=conn_src, dest=conn_dest',  -- Comma delimted string of edge arguments
+                       'pagerank_out',                  -- Output table of PageRank
+                       NULL,                            -- Default damping factor (0.85)
+                       NULL,                            -- Default max iters (100)
+                       0.00000001,                      -- Threshold
+                       'user_id');                      -- Grouping column name
 SELECT * FROM pagerank_out ORDER BY user_id, pagerank DESC;
 </pre>
 <pre class="result">
- user_id | id |      pagerank
----------+----+--------------------
-       1 |  0 |  0.27825488388552
-       1 |  3 |  0.20188114667075
-       1 |  2 |  0.14288112346059
-       1 |  6 |  0.11453637832147
-       1 |  1 |  0.10026745615438
-       1 |  4 |  0.10026745615438
-       1 |  5 |  0.06191155535288
-       2 |  0 |  0.31854625004173
-       2 |  3 |  0.23786686773343
-       2 |  2 |  0.15914876489397
-       2 |  1 |  0.11168334437971
-       2 |  4 |  0.11168334437971
-       2 |  6 |  0.03964285714285
-       2 |  5 |  0.02142857142857
+ user_id | node_id |      pagerank
+---------+---------+--------------------
+       1 |       0 |  0.27825488388552
+       1 |       3 |  0.20188114667075
+       1 |       2 |  0.14288112346059
+       1 |       6 |  0.11453637832147
+       1 |       1 |  0.10026745615438
+       1 |       4 |  0.10026745615438
+       1 |       5 |  0.06191155535288
+       2 |       0 |  0.31854625004173
+       2 |       3 |  0.23786686773343
+       2 |       2 |  0.15914876489397
+       2 |       1 |  0.11168334437971
+       2 |       4 |  0.11168334437971
+       2 |       6 |  0.03964285714285
+       2 |       5 |  0.02142857142857
 (14 rows)
 </pre>
 <pre class="syntax">
@@ -290,28 +294,28 @@ could be specified as ARRAY[2,4] as well.
 <pre class="syntax">
 DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
 SELECT madlib.pagerank(
-                       'vertex',             -- Vertex table
-                       'id',                 -- Vertix id column
-                       'edge',               -- Edge table
-                       'src=src, dest=dest', -- Comma delimted string of edge arguments
-                       'pagerank_out',       -- Output table of PageRank
-                        NULL,                -- Default damping factor (0.85)
-                        NULL,                -- Default max iters (100)
-                        NULL,                -- Default Threshold
-                        NULL,                -- No Grouping
-                       '{2,4}');             -- Personalization vertices
+                       'vertex',                        -- Vertex table
+                       'node_id',                       -- Vertex id column
+                       'edge',                          -- Edge table
+                       'src=conn_src, dest=conn_dest',  -- Comma delimted string of edge arguments
+                       'pagerank_out',                  -- Output table of PageRank
+                        NULL,                           -- Default damping factor (0.85)
+                        NULL,                           -- Default max iters (100)
+                        NULL,                           -- Default Threshold
+                        NULL,                           -- No Grouping
+                       '{2,4}');                        -- Personalization vertices
 SELECT * FROM pagerank_out ORDER BY pagerank DESC;
 </pre>
 <pre class="result">
- id |      pagerank
-----+--------------------
-  0 |  0.565232961966315
-  2 |  0.378139420991773
-  3 |  0.355003292266017
-  4 |  0.310111215897626
-  1 |  0.160111215897626
-  6 |  0.148615315574136
-  5 | 0.0803403307142321
+ node_id |      pagerank
+---------+--------------------
+  0      |  0.565232961966315
+  2      |  0.378139420991773
+  3      |  0.355003292266017
+  4      |  0.310111215897626
+  1      |  0.160111215897626
+  6      |  0.148615315574136
+  5      | 0.0803403307142321
 (7 rows)
 </pre>
 <pre class="syntax">
@@ -324,6 +328,102 @@ SELECT * FROM pagerank_out_summary;
 (1 row)
 </pre>
 
+-# Create vertex and edge tables with multiple column ids to represent the graph:
+<pre class="syntax">
+DROP TABLE IF EXISTS vertex_multicol_pagerank, edge_multicol_pagerank;
+CREATE TABLE vertex_multicol_pagerank(
+    node_id_major BIGINT,
+    node_id_minor BIGINT
+);
+CREATE TABLE edge_multicol_pagerank(
+    conn_src_major BIGINT,
+    conn_dest_major BIGINT,
+    user_id_major BIGINT,
+    conn_src_minor BIGINT,
+    conn_dest_minor BIGINT,
+    user_id_minor BIGINT
+);
+INSERT INTO vertex_multicol_pagerank VALUES
+(0, 0),
+(1, 1),
+(2, 2),
+(3, 3),
+(4, 4),
+(5, 5),
+(6, 6);
+INSERT INTO edge_multicol_pagerank VALUES
+(0, 1, 1, 0, 1, 1),
+(0, 2, 1, 0, 2, 1),
+(0, 4, 1, 0, 4, 1),
+(1, 2, 1, 1, 2, 1),
+(1, 3, 1, 1, 3, 1),
+(2, 3, 1, 2, 3, 1),
+(2, 5, 1, 2, 5, 1),
+(2, 6, 1, 2, 6, 1),
+(3, 0, 1, 3, 0, 1),
+(4, 0, 1, 4, 0, 1),
+(5, 6, 1, 5, 6, 1),
+(6, 3, 1, 6, 3, 1),
+(0, 1, 2, 0, 1, 2),
+(0, 2, 2, 0, 2, 2),
+(0, 4, 2, 0, 4, 2),
+(1, 2, 2, 1, 2, 2),
+(1, 3, 2, 1, 3, 2),
+(2, 3, 2, 2, 3, 2),
+(3, 0, 2, 3, 0, 2),
+(4, 0, 2, 4, 0, 2),
+(5, 6, 2, 5, 6, 2),
+(6, 3, 2, 6, 3, 2);
+</pre>
+
+-# Personalized PageRank. Here we specify {2,4}
+as the personalization vertices. This parameter
+could be specified as ARRAY[2,4] as well.
+<pre class="syntax">
+DROP TABLE IF EXISTS pagerank_multicol_out, pagerank_multicol_out_summary;
+SELECT madlib.pagerank(
+                       'vertex_multicol_pagerank',                                                      -- Vertex table
+                       '[node_id_major,node_id_minor]',                                                 -- Vertex id column
+                       'edge_multicol_pagerank',                                                        -- Edge table
+                       'src=[conn_src_major,conn_src_minor], dest=[conn_dest_major,conn_dest_minor]',   -- Comma delimted string of edge arguments
+                       'pagerank_multicol_out',                                                         -- Output table of PageRank
+                        NULL,                                                                           -- Default damping factor (0.85)
+                        NULL,                                                                           -- Default max iters (100)
+                        NULL,                                                                           -- Default Threshold
+                       'user_id_major,user_id_minor',                                                   -- Grouping Columns
+                       '{{2,2},{4,4}}');                                                                -- Personalization vertices
+SELECT * FROM pagerank_multicol_out ORDER BY pagerank DESC;
+</pre>
+<pre class="result">
+ user_id_major | user_id_minor |  id   |      pagerank
+---------------+---------------+-------+--------------------
+             2 |             2 | {0,0} |  0.448826703440932
+             2 |             2 | {3,3} |  0.325943770128465
+             1 |             1 | {0,0} |  0.270635964385879
+             2 |             2 | {2,2} |  0.256179815391031
+             2 |             2 | {4,4} |  0.202149921235622
+             1 |             1 | {2,2} |   0.18423239851445
+             1 |             1 | {3,3} |  0.166801820206414
+             1 |             1 | {4,4} |  0.151661035568349
+             2 |             2 | {1,1} |  0.127149921235622
+             1 |             1 | {6,6} | 0.0965411872854988
+             1 |             1 | {1,1} | 0.0766610355683489
+             2 |             2 | {5,5} |              0.075
+             2 |             2 | {6,6} |            0.06375
+             1 |             1 | {5,5} | 0.0521896024086525
+(7 rows)
+</pre>
+<pre class="syntax">
+SELECT * FROM pagerank_multicol_out_summary;
+</pre>
+<pre class="result">
+ user_id_major | user_id_minor | __iterations__
+---------------+---------------+----------------
+             2 |             2 |             45
+             1 |             1 |             41
+(1 row)
+</pre>
+
 @anchor notes
 @par Notes
 
diff --git a/src/ports/postgres/modules/graph/test/pagerank.sql_in b/src/ports/postgres/modules/graph/test/pagerank.sql_in
index 870ed008..e11b269f 100644
--- a/src/ports/postgres/modules/graph/test/pagerank.sql_in
+++ b/src/ports/postgres/modules/graph/test/pagerank.sql_in
@@ -64,7 +64,7 @@ INSERT INTO "EDGE" VALUES
 DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
 SELECT pagerank(
              'vertex',        -- Vertex table
-             'id',            -- Vertix id column
+             'id',            -- Vertex id column
              '"EDGE"',          -- "EDGE" table
              'src=src, dest=dest', -- "EDGE" args
              'pagerank_out');    -- Output table of PageRank
@@ -80,7 +80,7 @@ DROP TABLE IF EXISTS pagerank_gr_out;
 DROP TABLE IF EXISTS pagerank_gr_out_summary;
 SELECT pagerank(
              'vertex',        -- Vertex table
-             'id',            -- Vertix id column
+             'id',            -- Vertex id column
              '"EDGE"',          -- "EDGE" table
              'src=src, dest=dest', -- "EDGE" args
              'pagerank_gr_out', -- Output table of PageRank
@@ -113,7 +113,7 @@ DROP TABLE IF EXISTS pagerank_ppr_out;
 DROP TABLE IF EXISTS pagerank_ppr_out_summary;
 SELECT pagerank(
              'vertex',        -- Vertex table
-             'id',            -- Vertix id column
+             'id',            -- Vertex id column
              '"EDGE"',          -- "EDGE" table
              'src=src, dest=dest', -- "EDGE" args
              'pagerank_ppr_out', -- Output table of PageRank
@@ -133,7 +133,7 @@ DROP TABLE IF EXISTS pagerank_ppr_grp_out;
 DROP TABLE IF EXISTS pagerank_ppr_grp_out_summary;
 SELECT pagerank(
              'vertex',        -- Vertex table
-             'id',            -- Vertix id column
+             'id',            -- Vertex id column
              '"EDGE"',          -- "EDGE" table
              'src=src, dest=dest', -- "EDGE" args
              'pagerank_ppr_grp_out', -- Output table of PageRank
@@ -180,7 +180,7 @@ DROP TABLE IF EXISTS pagerank_gr_out;
 DROP TABLE IF EXISTS pagerank_gr_out_summary;
 SELECT pagerank(
 'vertex', -- Vertex table
-'id', -- Vertix id column
+'id', -- Vertex id column
 '"EDGE"', -- "EDGE" table
 'src=src, dest=dest', -- "EDGE" args
 'pagerank_gr_out', -- Output table of PageRank
@@ -217,3 +217,91 @@ DROP TABLE IF EXISTS pg_temp.out2, pg_temp.out2_summary;
 SELECT pagerank('v2',NULL,'e2',NULL,'pg_temp.out2');
 SELECT count(*) from pg_temp.out2;
 SELECT * from pg_temp.out2_summary;
+
+-- Test for multiple column identifiers
+-- The datasets have the columns doubled so that the same tests can be run on the output tables
+
+DROP TABLE IF EXISTS vertex_mult_pagerank, edge_mult_pagerank CASCADE;
+CREATE TABLE vertex_mult_pagerank(
+id1 INTEGER,
+id2 INTEGER
+);
+CREATE TABLE edge_mult_pagerank(
+src1 INTEGER,
+dest1 INTEGER,
+user_id1 INTEGER,
+src2 INTEGER,
+dest2 INTEGER,
+user_id2 INTEGER
+);
+INSERT INTO vertex_mult_pagerank VALUES
+(0, 0),
+(1, 1),
+(2, 2),
+(3, 3),
+(4, 4),
+(5, 5),
+(6, 6);
+INSERT INTO edge_mult_pagerank VALUES
+(0, 1, 1, 0, 1, 1),
+(0, 2, 1, 0, 2, 1),
+(0, 4, 1, 0, 4, 1),
+(1, 2, 1, 1, 2, 1),
+(1, 3, 1, 1, 3, 1),
+(2, 3, 1, 2, 3, 1),
+(2, 5, 1, 2, 5, 1),
+(2, 6, 1, 2, 6, 1),
+(3, 0, 1, 3, 0, 1),
+(4, 0, 1, 4, 0, 1),
+(5, 6, 1, 5, 6, 1),
+(6, 3, 1, 6, 3, 1),
+(0, 1, 2, 0, 1, 2),
+(0, 2, 2, 0, 2, 2),
+(0, 4, 2, 0, 4, 2),
+(1, 2, 2, 1, 2, 2),
+(1, 3, 2, 1, 3, 2),
+(2, 3, 2, 2, 3, 2),
+(3, 0, 2, 3, 0, 2),
+(4, 0, 2, 4, 0, 2),
+(5, 6, 2, 5, 6, 2),
+(6, 3, 2, 6, 3, 2);
+
+DROP TABLE IF EXISTS pagerank_mult_col_out;
+DROP TABLE IF EXISTS pagerank_mult_col_out_summary;
+SELECT pagerank(
+'vertex_mult_pagerank', -- Vertex table
+'[id1,id2]', -- Vertex id column
+'edge_mult_pagerank', -- edge_mult_pagerank table
+'src=[src1,src2], dest=[dest1,dest2]', -- edge_mult_pagerank args
+'pagerank_mult_col_out', -- Output table of PageRank
+NULL, -- Default damping factor (0.85)
+NULL, -- Default max iters (100)
+NULL, -- Default Threshold
+'user_id1,user_id2');
+SELECT assert(relative_error(SUM(pagerank), 1) < 0.005,
+        'PageRank: Scores do not sum up to 1 for group 1.'
+    ) FROM pagerank_mult_col_out WHERE user_id1=1;
+SELECT assert(id = '{0,0}'::BIGINT[], 'Unexpected Ranking') FROM (
+        SELECT id FROM pagerank_mult_col_out
+        WHERE user_id1 = 1 ORDER BY pagerank DESC LIMIT 1)q;
+
+DROP TABLE IF EXISTS pagerank_mult_col_out;
+DROP TABLE IF EXISTS pagerank_mult_col_out_summary;
+SELECT pagerank(
+'vertex_mult_pagerank', -- Vertex table
+'[id1,id2]', -- Vertex id column
+'edge_mult_pagerank', -- edge_mult_pagerank table
+'src=[src1,src2], dest=[dest1,dest2]', -- edge_mult_pagerank args
+'pagerank_mult_col_out', -- Output table of PageRank
+NULL, -- Default damping factor (0.85)
+NULL, -- Default max iters (100)
+NULL, -- Default Threshold
+'user_id1,user_id2',
+'{{1,1},{3,3}}');
+
+SELECT assert(relative_error(SUM(pagerank), 1) < 0.005,
+        'PageRank: Scores do not sum up to 1 for group 1.'
+    ) FROM pagerank_mult_col_out WHERE user_id1=1;
+SELECT assert(id = '{0,0}'::BIGINT[], 'Unexpected Ranking') FROM (
+        SELECT id FROM pagerank_mult_col_out
+        WHERE user_id1 = 1 ORDER BY pagerank DESC LIMIT 1)q;
diff --git a/src/ports/postgres/modules/graph/test/wcc.sql_in b/src/ports/postgres/modules/graph/test/wcc.sql_in
index e1dd7b59..f7af6868 100644
--- a/src/ports/postgres/modules/graph/test/wcc.sql_in
+++ b/src/ports/postgres/modules/graph/test/wcc.sql_in
@@ -135,20 +135,20 @@ SELECT assert(relative_error(num_vertices, 4) < 0.00001,
 DROP TABLE IF EXISTS vc_table;
 SELECT graph_wcc_vertex_check(
      'wcc_out',    -- WCC's output table
-     '14,15',      -- Pair of vertex IDs
+     ARRAY[14,15], -- Pair of vertex IDs
      'vc_table');  -- output table
-SELECT assert(relative_error(component_id, 14) < 0.00001,
+SELECT assert(relative_error(count(DISTINCT component_id), 1) < 0.00001,
         'Weakly Connected Components: Incorrect vertex check value.'
     ) FROM vc_table WHERE user_id=1;
 
 DROP TABLE IF EXISTS reach_table;
 SELECT graph_wcc_reachable_vertices(
      'wcc_out',         -- WCC's output table
-     '0',               -- source vertex
+     0,               -- source vertex
      'reach_table');    -- output table
 SELECT assert(relative_error(count(dest), 5) < 0.00001,
         'Weakly Connected Components: Incorrect reachable vertices value.'
-    ) FROM reach_table WHERE user_id=2 and component_id=0;
+    ) FROM reach_table WHERE user_id=2;
 
 DROP TABLE IF EXISTS count_table;
 SELECT graph_wcc_num_cpts(
@@ -183,3 +183,96 @@ CREATE TABLE e2 AS SELECT (src_node+992147483647)::bigint as src, (dest_node+992
 SELECT weakly_connected_components('v2',NULL,'e2',NULL,'pg_temp.wcc_out');
 SELECT count(*) from pg_temp.wcc_out;
 SELECT count(*) from pg_temp.wcc_out_summary;
+
+-- Test for multiple column identifiers
+-- The datasets have the columns doubled so that the same tests can be run on the output tables
+
+DROP TABLE IF EXISTS vertex_mult, edge_mult CASCADE;
+CREATE TABLE vertex_mult AS SELECT id AS id1, id AS id2 FROM vertex;
+CREATE TABLE edge_mult AS
+SELECT src_node AS src1, src_node AS src2,
+       dest_node AS dest1, dest_node AS dest2,
+       user_id AS user_id1, user_id AS user_id2
+FROM "EDGE"
+WHERE user_id = 1;
+
+DROP TABLE IF EXISTS wcc_mult_out CASCADE;
+DROP TABLE IF EXISTS wcc_mult_out_summary CASCADE;
+SELECT weakly_connected_components(
+    'vertex_mult',
+    '[id1,id2]',
+    'edge_mult',
+    'src=[src1,src2], dest=[dest1,dest2]',
+    'wcc_mult_out');
+
+SELECT assert(relative_error(count(distinct component_id), 4) < 0.00001,
+        'Weakly Connected Components: Number of components found is not 4.'
+    ) FROM wcc_out;
+
+INSERT INTO edge_mult
+SELECT src_node AS src1, src_node AS src2,
+       dest_node AS dest1, dest_node AS dest2,
+       user_id AS user_id1, user_id AS user_id2
+FROM "EDGE"
+WHERE user_id = 2;
+
+DROP TABLE IF EXISTS wcc_mult_out CASCADE;
+DROP TABLE IF EXISTS wcc_mult_out_summary CASCADE;
+SELECT weakly_connected_components(
+    'vertex_mult',
+    '[id1,id2]',
+    'edge_mult',
+    'src=[src1,src2], dest=[dest1,dest2]',
+    'wcc_mult_out',
+    'user_id1,user_id2');
+
+SELECT assert(relative_error(count(distinct component_id), 3) < 0.00001,
+        'Weakly Connected Components: Number of components found is not 4.'
+    ) FROM wcc_mult_out WHERE user_id1=1;
+
+SELECT assert(relative_error(count(distinct component_id), 3) < 0.00001,
+        'Weakly Connected Components: Number of components found is not 4.'
+    ) FROM wcc_mult_out WHERE user_id1=1;
+
+-- Test WCC helper functions:
+DROP TABLE IF EXISTS largest_cpt_table;
+SELECT graph_wcc_largest_cpt(
+     'wcc_mult_out',             -- WCC's output table
+     'largest_cpt_table');  -- output table
+SELECT assert(relative_error(num_vertices, 6) < 0.00001,
+        'Weakly Connected Components: Incorrect largest component value.'
+    ) FROM largest_cpt_table WHERE user_id1=2;
+
+DROP TABLE IF EXISTS histogram_table;
+SELECT graph_wcc_histogram(
+     'wcc_mult_out',           -- WCC's output table
+     'histogram_table');  -- output table
+SELECT assert(array_agg(num_vertices order by num_vertices asc)= '{3, 4, 6}',
+        'Weakly Connected Components: Incorrect histogram value.'
+    ) FROM histogram_table WHERE user_id1=1;
+
+DROP TABLE IF EXISTS vc_table;
+SELECT graph_wcc_vertex_check(
+     'wcc_mult_out',    -- WCC's output table
+     '{{14,14},{15,15}}',      -- Pair of vertex IDs
+     'vc_table');  -- output table
+SELECT assert(relative_error(count(DISTINCT component_id), 1) < 0.00001,
+        'Weakly Connected Components: Incorrect vertex check value.'
+    ) FROM vc_table WHERE user_id1=1;
+
+DROP TABLE IF EXISTS reach_table;
+SELECT graph_wcc_reachable_vertices(
+     'wcc_mult_out',         -- WCC's output table
+     '{0,0}'::BIGINT[],               -- source vertex
+     'reach_table');    -- output table
+SELECT assert(relative_error(count(dest), 5) < 0.00001,
+        'Weakly Connected Components: Incorrect reachable vertices value.'
+    ) FROM reach_table WHERE user_id1=2;
+
+DROP TABLE IF EXISTS count_table;
+SELECT graph_wcc_num_cpts(
+     'wcc_mult_out',       -- WCC's output table
+     'count_table');  -- output table
+SELECT assert(relative_error(num_components, 3) < 0.00001,
+        'Weakly Connected Components: Incorrect largest component value.'
+    ) FROM count_table WHERE user_id1=1;
diff --git a/src/ports/postgres/modules/graph/wcc.py_in b/src/ports/postgres/modules/graph/wcc.py_in
index 0c80aaba..fe0d8770 100644
--- a/src/ports/postgres/modules/graph/wcc.py_in
+++ b/src/ports/postgres/modules/graph/wcc.py_in
@@ -36,6 +36,7 @@ from utilities.utilities import extract_keyvalue_params
 from utilities.utilities import unique_string, split_quoted_delimited_str
 from utilities.validate_args import columns_exist_in_table, get_expr_type
 from utilities.utilities import is_platform_pg
+from utilities.utilities import get_seg_number
 from utilities.utilities import add_postfix
 from utilities.validate_args import table_exists
 from utilities.utilities import rename_table
@@ -74,26 +75,85 @@ def wcc(schema_madlib, vertex_table, vertex_id, edge_table, edge_args,
         @param out_table
         @param grouping_cols
     """
+
+    vertex_table_in = vertex_table
+    vertex_id_in = vertex_id
+
     old_msg_level = plpy.execute("""
                                   SELECT setting
                                   FROM pg_settings
                                   WHERE name='client_min_messages'
                                   """)[0]['setting']
     plpy.execute('SET client_min_messages TO warning')
-    params_types = {'src': str, 'dest': str}
-    default_args = {'src': 'src', 'dest': 'dest'}
+    params_types = {'src': list, 'dest': list}
+    default_args = {'src': ['src'], 'dest': ['dest']}
     edge_params = extract_keyvalue_params(
         edge_args, params_types, default_args)
 
     # populate default values for optional params if null, and prepare data
     # to be written into the summary table (*_st variable names)
+    vertex_view = unique_string('vertex_view')
+    edge_view = unique_string('edge_view')
+    single_id = 'single_id'
+    vertex_view_sql = """
+        CREATE VIEW {vertex_view} AS
+        SELECT {vertex_sql} AS id, {vertex_sql} AS {single_id}
+        FROM {vertex_table}
+        """
     if not vertex_id:
         vertex_id = "id"
-        v_st = "id"
+        vertex_sql = vertex_id
+        vertex_type = "BIGINT"
     else:
-        v_st = vertex_id
+        if vertex_id[0] == '[' and vertex_id[-1] == ']':
+            vertex_id = split_quoted_delimited_str(vertex_id[1:-1])
+            vertex_sql = "ARRAY[{0}]".format(','.join(vertex_id))
+            vertex_type = "BIGINT[]"
+
+            if is_platform_pg():
+                num_segments = 1
+                seg_sql = ' 0 '
+            else:
+                num_segments = get_seg_number()
+                seg_sql = " gp_segment_id "
+            vertex_view_sql = """
+                CREATE VIEW {vertex_view} AS
+                WITH q1 AS (
+                    SELECT {vertex_sql} AS id,
+                        ctid AS ctid_in,
+                        {seg_sql} AS seg_id_in,
+                        CAST( regexp_matches(ctid::TEXT, '\\((\\d+),(\\d+)\\)') AS BIGINT[]) AS new_id
+                    FROM {vertex_table}),
+                q2 AS (SELECT MAX(new_id[1]) AS max_block FROM q1)
+                SELECT id, ctid_in, {num_segments}*(new_id[2]*(max_block+1)+new_id[1])+seg_id_in AS {single_id}
+                FROM q1, q2;
+            """
+
+        else:
+            vertex_sql = vertex_id
+            vertex_id = [vertex_id]
+            vertex_type = "BIGINT"
+
+
+    src_list = edge_params["src"]
+    if len(src_list) > 1:
+        src = "ARRAY[{0}]".format(','.join(edge_params["src"]))
+    else:
+        edge_params["src"] = edge_params["src"][0]
+        src = edge_params["src"]
+
+    dest_list = edge_params["dest"]
+    if len(dest_list) > 1:
+        dest = "ARRAY[{0}]".format(','.join(edge_params["dest"]))
+    else:
+        edge_params["dest"] = edge_params["dest"][0]
+        dest = edge_params["dest"]
+
     if not grouping_cols:
         grouping_cols = ''
+        grouping_sql = ''
+    else:
+        grouping_sql = ', {0}'.format(grouping_cols)
 
     out_table_summary = ''
     if out_table:
@@ -102,8 +162,24 @@ def wcc(schema_madlib, vertex_table, vertex_id, edge_table, edge_args,
     validate_wcc_args(schema_madlib, vertex_table, vertex_id, edge_table,
                       edge_params, out_table, out_table_summary,
                       grouping_cols_list, 'Weakly Connected Components')
-    src = edge_params["src"]
-    dest = edge_params["dest"]
+
+    vertex_view_sql = vertex_view_sql.format(**locals())
+    plpy.execute(vertex_view_sql)
+
+    sql = """
+        CREATE VIEW {edge_view} AS
+        SELECT {src} AS src, {dest} AS dest {grouping_sql}
+        FROM {edge_table}
+        """.format(**locals())
+    plpy.execute(sql)
+
+    vertex_table = vertex_view
+    edge_table = edge_view
+    vertex_id = 'id'
+    src = 'src'
+    dest = 'dest'
+
+    distribution = '' if is_platform_pg() else "DISTRIBUTED BY (id)"
 
     message = unique_string(desp='message')
     oldupdate = unique_string(desp='oldupdate')
@@ -112,8 +188,6 @@ def wcc(schema_madlib, vertex_table, vertex_id, edge_table, edge_args,
     temp_out_table = unique_string(desp='tempout')
     edge_inverse = unique_string(desp='edge_inverse')
 
-    distribution = '' if is_platform_pg() else \
-        "DISTRIBUTED BY ({0})".format(vertex_id)
     subq_prefixed_grouping_cols = ''
     comma_toupdate_prefixed_grouping_cols = ''
     comma_oldupdate_prefixed_grouping_cols = ''
@@ -192,10 +266,11 @@ def wcc(schema_madlib, vertex_table, vertex_id, edge_table, edge_args,
 
         message_sql = """
             CREATE TABLE {message} AS
-            SELECT {vertex_id},
-                    CAST({vertex_id} AS BIGINT) AS {component_id}
+            SELECT {vertex_table}.{vertex_id},
+                    CAST({vertex_table}.{single_id} AS BIGINT) AS {component_id}
                     {comma_grouping_cols}
-            FROM {newupdate}
+            FROM {newupdate} INNER JOIN {vertex_table}
+            ON {vertex_table}.{vertex_id} = {newupdate}.{vertex_id}
             {distribution};
         """
         plpy.execute(message_sql.format(**locals()))
@@ -207,7 +282,7 @@ def wcc(schema_madlib, vertex_table, vertex_id, edge_table, edge_args,
             {distribution};
 
             CREATE TABLE {message} AS
-            SELECT {vertex_id}, CAST({vertex_id} AS BIGINT) AS {component_id}
+            SELECT {vertex_id}, CAST({single_id} AS BIGINT) AS {component_id}
             FROM {vertex_table}
             {distribution};
         """
@@ -312,22 +387,25 @@ def wcc(schema_madlib, vertex_table, vertex_id, edge_table, edge_args,
         plpy.execute("DROP TABLE IF EXISTS {0}".format(edge_inverse))
 
     rename_table(schema_madlib, newupdate, out_table)
+    if vertex_type != "BIGINT[]" and vertex_id_in and vertex_id_in != 'id':
+        plpy.execute("ALTER TABLE {out_table} RENAME COLUMN id TO {vertex_id_in}".format(**locals()))
     # Create summary table. We only need the vertex_id and grouping columns
     # in it.
-    vertex_id_type = get_expr_type(vertex_id, vertex_table)
 
+    plpy.execute("DROP VIEW IF EXISTS {0}, {1}".format(vertex_view, edge_view))
     plpy.execute("""
         CREATE TABLE {out_table_summary} AS SELECT
             {grouping_cols_summary}
-            '{vertex_table}'::TEXT AS vertex_table,
-            '{vertex_id}'::TEXT AS vertex_id,
-            '{vertex_id_type}'::TEXT AS vertex_id_type;
+            '{vertex_table_in}'::TEXT AS vertex_table,
+            '{vertex_id_in}'::TEXT AS vertex_id,
+            '{vertex_type}'::TEXT AS vertex_id_type;
 
         DROP TABLE IF EXISTS {message},{oldupdate},{newupdate},{toupdate};
     """.format(grouping_cols_summary='' if not grouping_cols else
                     "'{0}'::TEXT AS grouping_cols, ".format(grouping_cols),
                **locals()))
 
+
 # WCC Helper functions:
 def extract_wcc_summary_cols(wcc_summary_table):
     """
@@ -381,6 +459,43 @@ def check_input_vertex_validity(wcc_args, vertices):
     _assert(count == len(vertices),
             "Graph WCC: Invalid input vertex in {0}.".format(str(vertices)))
 
+def check_input_mcol_vertex_validity(schema_madlib, wcc_args, vertices, vertex_str_list):
+    """
+    Function to check if vertices are all valid, i.e., are present
+    in the WCC's original input vertex table. Even if one of the input
+    vertices (when more than one) is not valid, return False
+    Args:
+        @param wcc_args (dict)
+        @param vertices (list of list)
+    Returns:
+        True if all vertices in the list are present in the original input
+        vertex table, False otherwise.
+    """
+    vertex_table = wcc_args['vertex_table']
+    _assert(table_exists(vertex_table),
+            "Graph WCC: Input vertex table '{0}' does not exist.".format(
+                vertex_table))
+    vertex_col = wcc_args['vertex_id']
+
+    psubq = unique_string(desp='psubquery')
+    vertex_str = ','.join(vertex_str_list)
+
+    vertex_join = """
+        (SELECT ({schema_madlib}.array_unnest_2d_to_1d('{{ {vertex_str} }}'::BIGINT[])).unnest_result
+        ) {psubq}
+        """.format(**locals())
+
+    sql = """
+            SELECT COUNT(*) as count
+            FROM
+                {vertex_table}
+                INNER JOIN {vertex_join}
+            ON (ARRAY{vertex_col}::BIGINT[] = unnest_result)
+        """.format(**locals())
+    count = plpy.execute(sql)[0]['count']
+    _assert(count == len(vertices),
+            "Graph WCC: Invalid input vertex in {0}.".format(str(vertices)))
+
 def create_component_cnts_table(wcc_table, cnts_out_table,
                                 grouping_cols_comma):
     """
@@ -503,11 +618,20 @@ def graph_wcc_vertex_check(schema_madlib,  wcc_table, vertex_pair, pair_table,
     """
     with MinWarning("warning"):
         wcc_args = preprocess_wcc_table_args(wcc_table, pair_table)
-        vertices = split_quoted_delimited_str(vertex_pair)
-        _assert(vertices and len(vertices) == 2,
+
+        _assert(vertex_pair and len(vertex_pair) == 2,
                 "Graph WCC: Invalid vertex pair ({0}) input.".format(
                     vertex_pair))
-        check_input_vertex_validity(wcc_args, vertices)
+
+        if isinstance(vertex_pair[0], list):
+            vertex_str_list = []
+            for i in vertex_pair:
+                vertex_str_list.append('{' + ', '.join([str(j) for j in i]) + '}')
+            check_input_mcol_vertex_validity(schema_madlib, wcc_args, vertex_pair, vertex_str_list)
+        else:
+            check_input_vertex_validity(wcc_args, vertex_pair)
+            vertex_str_list = vertex_pair
+
         grouping_cols_comma = ''
         if 'grouping_cols' in wcc_args:
             grouping_cols_comma = wcc_args['grouping_cols'] + ', '
@@ -517,7 +641,12 @@ def graph_wcc_vertex_check(schema_madlib,  wcc_table, vertex_pair, pair_table,
         inner_from_clause = " FROM {0} ".format(wcc_table)
         inner_groupby_clause = " GROUP BY {0} component_id".format(
             grouping_cols_comma)
-        plpy.execute("""
+
+        vertex_id = wcc_args['vertex_id']
+        if vertex_id[0] == '[' and vertex_id[-1] == ']':
+            vertex_id = 'id'
+
+        sql = """
                 CREATE TABLE {pair_table} AS
                 SELECT {grouping_cols_comma} component_id
                 FROM (
@@ -533,9 +662,8 @@ def graph_wcc_vertex_check(schema_madlib,  wcc_table, vertex_pair, pair_table,
                 ) {subq}
                 GROUP BY {grouping_cols_comma} component_id
                 HAVING COUNT(*)=2
-            """.format(vertex_id=wcc_args['vertex_id'],
-                       vertex1=vertices[0], vertex2=vertices[1], **locals()))
-
+            """.format(vertex1=vertex_str_list[0], vertex2=vertex_str_list[1], **locals())
+        plpy.execute(sql)
 
 def graph_wcc_reachable_vertices(schema_madlib, wcc_table, src,
                                  reachable_vertices_table, **kwargs):
@@ -555,13 +683,23 @@ def graph_wcc_reachable_vertices(schema_madlib, wcc_table, src,
     with MinWarning("warning"):
         wcc_args = preprocess_wcc_table_args(wcc_table,
                                              reachable_vertices_table)
-        check_input_vertex_validity(wcc_args, split_quoted_delimited_str(src))
+        if not isinstance(src, list):
+            vertex_str = str(src)
+            check_input_vertex_validity(wcc_args, [vertex_str])
+        else:
+            vertex_str = '{' + ', '.join([str(j) for j in src]) + '}'
+            check_input_mcol_vertex_validity(schema_madlib, wcc_args, [src], [vertex_str])
+
         grouping_cols_comma = ''
         grouping_cols = ''
         if 'grouping_cols' in wcc_args:
             grouping_cols = wcc_args['grouping_cols']
             grouping_cols_comma = grouping_cols + ', '
+
         vertex_id = wcc_args['vertex_id']
+        if vertex_id[0] == '[' and vertex_id[-1] == ']':
+            vertex_id = 'id'
+
         subq = unique_string(desp='subq')
         glist = split_quoted_delimited_str(grouping_cols)
         grouping_cols_join = '' if not grouping_cols else ' AND ' + \
@@ -577,11 +715,11 @@ def graph_wcc_reachable_vertices(schema_madlib, wcc_table, src,
                     SELECT {grouping_cols_comma} component_id, {vertex_id}
                     FROM {wcc_table}
                     GROUP BY {vertex_id}, {grouping_cols_comma} component_id
-                    HAVING {vertex_id}='{src}'
+                    HAVING {vertex_id} ='{vertex_str}'
                 ) {subq}
                 ON {wcc_table}.component_id={subq}.component_id
                     {grouping_cols_join}
-                WHERE {wcc_table}.{vertex_id} != '{src}'
+                WHERE {wcc_table}.{vertex_id} != '{vertex_str}'
             """.format(**locals()))
 
 
diff --git a/src/ports/postgres/modules/graph/wcc.sql_in b/src/ports/postgres/modules/graph/wcc.sql_in
index 8d4cb87c..26a8a8d6 100644
--- a/src/ports/postgres/modules/graph/wcc.sql_in
+++ b/src/ports/postgres/modules/graph/wcc.sql_in
@@ -72,9 +72,10 @@ weakly_connected_components( vertex_table,
 column specified in the 'vertex_id' parameter below.</dd>
 
 <dt>vertex_id</dt>
-<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
+<dd>TEXT, default = 'id'. Name of the column(s) in 'vertex_table' containing
 vertex ids.  The vertex ids can be of type INTEGER or BIGINT with no duplicates.
-They do not need to be contiguous.</dd>
+They do not need to be contiguous. If multiple columns are used as vertex ids,
+they are passed in the following format: [<vertex_id1>,<vertex_id2>,...]</dd>
 
 <dt>edge_table</dt>
 <dd>TEXT. Name of the table containing the edge data. The edge table must
@@ -84,14 +85,16 @@ contain columns for source vertex and destination vertex.</dd>
 <dd>TEXT. A comma-delimited string containing multiple named arguments of
 the form "name=value". The following parameters are supported for
 this string argument:
-  - src (INTEGER or BIGINT): Name of the column containing the source vertex ids in the edge table. Default column name is 'src'.
-  - dest (INTEGER or BIGINT): Name of the column containing the destination vertex ids in the edge table. Default column name is 'dest'.</dd>
+  - src (INTEGER or BIGINT): Name of the column(s) containing the source vertex ids in the edge table. Default column name is 'src'.
+  - dest (INTEGER or BIGINT): Name of the column(s) containing the destination vertex ids in the edge table. Default column name is 'dest'.</dd>
 
 <dt>out_table</dt>
 <dd>TEXT. Name of the table to store the component ID associated with each vertex.
 It will contain a row for every vertex from 'vertex_table' with
 the following columns:
-  - vertex_id : The id of a vertex. Will use the input parameter 'vertex_id' for column naming.
+  - vertex_id : The id of a vertex. Will use the input parameter 'vertex_id'
+  for column naming. If multiple columns are used for identifying vertices,
+  this column will be an array named "id".
   - component_id : Component that the vertex belongs to.
   We use the convention where 'component_id' is the id of
   the first vertex in a particular group.  It means that component ids
@@ -207,7 +210,9 @@ graph_wcc_vertex_check( wcc_table,
 components.</dd>
 
 <dt>vertex_pair</dt>
-<dd>TEXT. A pair of vertex IDs separated by a comma.</dd>
+<dd>BIGINT[]. A pair of vertex IDs separated by a comma. If multiple
+columns are used for identifying vertices, a 2D array will be required for this
+parameter.</dd>
 
 <dt>pair_table</dt>
 <dd>TEXT. Name of the output table that specifies if the two vertices in
@@ -241,7 +246,7 @@ graph_wcc_reachable_vertices( wcc_table,
 components.</dd>
 
 <dt>src</dt>
-<dd>TEXT. The vertex ID from which all reachable vertices have to be found.</dd>
+<dd>BIGINT or BIGINT[]. The vertex ID from which all reachable vertices have to be found.</dd>
 
 <dt>reachable_vertices_table</dt>
 <dd>TEXT. Name of the output table that contains the list of vertices that are
@@ -289,15 +294,17 @@ table has the following columns:
 @anchor examples
 @examp
 
+<a href="example/madlib_wcc_example.sql">Download the example sql file here.</a>
+
 -# Create vertex and edge tables to represent the graph:
 <pre class="syntax">
 DROP TABLE IF EXISTS vertex, edge;
 CREATE TABLE vertex(
-    id INTEGER
+    node_id INTEGER
 );
 CREATE TABLE edge(
-    src INTEGER,
-    dest INTEGER,
+    conn_src INTEGER,
+    conn_dest INTEGER,
     user_id INTEGER
 );
 INSERT INTO vertex VALUES
@@ -340,30 +347,30 @@ INSERT INTO edge VALUES
 <pre class="syntax">
 DROP TABLE IF EXISTS wcc_out, wcc_out_summary;
 SELECT madlib.weakly_connected_components(
-                         'vertex',             -- Vertex table
-                         'id',                 -- Vertix id column
-                         'edge',               -- Edge table
-                         'src=src, dest=dest', -- Comma delimted string of edge arguments
-                         'wcc_out');      -- Output table of weakly connected components
+    'vertex',                        -- Vertex table
+    'node_id',                       -- Vertex id column
+    'edge',                          -- Edge table
+    'src=conn_src, dest=conn_dest',  -- Comma delimted string of edge arguments
+    'wcc_out');                      -- Output table of weakly connected components
 SELECT * FROM wcc_out ORDER BY component_id, id;
 </pre>
 <pre class="result">
- id | component_id
-----+--------------
-  0 |            0
-  1 |            0
-  2 |            0
-  3 |            0
-  5 |            0
-  6 |            0
-  4 |            4
- 10 |           10
- 11 |           10
- 12 |           10
- 13 |           10
- 14 |           14
- 15 |           14
- 16 |           14
+ node_id | component_id
+---------+--------------
+       0 |            0
+       1 |            0
+       2 |            0
+       3 |            0
+       5 |            0
+       6 |            0
+       4 |            4
+      10 |           10
+      11 |           10
+      12 |           10
+      13 |           10
+      14 |           14
+      15 |           14
+      16 |           14
 (14 rows)
 </pre>
 
@@ -372,30 +379,30 @@ using the grouping feature:
 <pre class="syntax">
 DROP TABLE IF EXISTS wcc_out, wcc_out_summary;
 SELECT madlib.weakly_connected_components(
-                         'vertex',             -- Vertex table
-                         'id',                 -- Vertix id column
-                         'edge',               -- Edge table
-                         'src=src, dest=dest', -- Comma delimted string of edge arguments
-                         'wcc_out',       -- Output table of weakly connected components
-                         'user_id');           -- Grouping column name
+    'vertex',                       -- Vertex table
+    'node_id',                      -- Vertex id column
+    'edge',                         -- Edge table
+    'src=conn_src, dest=conn_dest', -- Comma delimted string of edge arguments
+    'wcc_out',                      -- Output table of weakly connected components
+    'user_id');                     -- Grouping column name
 SELECT * FROM wcc_out ORDER BY user_id, component_id, id;
 </pre>
 <pre class="result">
- id | component_id | user_id
-----+--------------+---------
-  0 |            0 |       1
-  1 |            0 |       1
-  2 |            0 |       1
-  3 |            0 |       1
-  5 |            0 |       1
-  6 |            0 |       1
- 10 |           10 |       2
- 11 |           10 |       2
- 12 |           10 |       2
- 13 |           10 |       2
- 14 |           14 |       2
- 15 |           14 |       2
- 16 |           14 |       2
+ node_id | component_id | user_id
+---------+--------------+---------
+       0 |            0 |       1
+       1 |            0 |       1
+       2 |            0 |       1
+       3 |            0 |       1
+       5 |            0 |       1
+       6 |            0 |       1
+      10 |           10 |       2
+      11 |           10 |       2
+      12 |           10 |       2
+      13 |           10 |       2
+      14 |           14 |       2
+      15 |           14 |       2
+      16 |           14 |       2
 (13 rows)
 </pre>
 Note that vertex 4 is not identified as a separate component
@@ -489,6 +496,86 @@ SELECT * FROM count_table;
 (2 rows)
 </pre>
 
+-# Create vertex and edge tables with multiple column ids to represent the graph:
+<pre class="syntax">
+DROP TABLE IF EXISTS vertex_multicol_wcc, edge_multicol_wcc;
+CREATE TABLE vertex_multicol_wcc(
+    node_id_major BIGINT,
+    node_id_minor BIGINT
+);
+CREATE TABLE edge_multicol_wcc(
+    conn_src_major BIGINT,
+    conn_dest_major BIGINT,
+    user_id_major BIGINT,
+    conn_src_minor BIGINT,
+    conn_dest_minor BIGINT,
+    user_id_minor BIGINT
+);
+INSERT INTO vertex_multicol_wcc VALUES
+(0, 0),
+(1, 1),
+(2, 2),
+(3, 3),
+(4, 4),
+(5, 5),
+(6, 6);
+INSERT INTO edge_multicol_wcc VALUES
+(0, 1, 1, 0, 1, 1),
+(0, 2, 1, 0, 2, 1),
+(0, 4, 1, 0, 4, 1),
+(1, 2, 1, 1, 2, 1),
+(1, 3, 1, 1, 3, 1),
+(2, 3, 1, 2, 3, 1),
+(2, 5, 1, 2, 5, 1),
+(2, 6, 1, 2, 6, 1),
+(3, 0, 1, 3, 0, 1),
+(4, 0, 1, 4, 0, 1),
+(5, 6, 1, 5, 6, 1),
+(6, 3, 1, 6, 3, 1),
+(0, 1, 2, 0, 1, 2),
+(0, 2, 2, 0, 2, 2),
+(0, 4, 2, 0, 4, 2),
+(1, 2, 2, 1, 2, 2),
+(1, 3, 2, 1, 3, 2),
+(2, 3, 2, 2, 3, 2),
+(3, 0, 2, 3, 0, 2),
+(4, 0, 2, 4, 0, 2),
+(5, 6, 2, 5, 6, 2),
+(6, 3, 2, 6, 3, 2);
+</pre>
+
+-# Find all the weakly connected components in the graph:
+<pre class="syntax">
+DROP TABLE IF EXISTS wcc_multicol_out, wcc_multicol_out_summary;
+SELECT madlib.weakly_connected_components(
+    'vertex_multicol_wcc',                                                          -- Vertex table
+    '[node_id_major,node_id_minor]',                                                -- Vertex id column
+    'edge_multicol_wcc',                                                            -- Edge table
+    'src=[conn_src_major,conn_src_minor], dest=[conn_dest_major,conn_dest_minor]',  -- Comma delimted string of edge arguments
+    'wcc_multicol_out',                                                             -- Output table of weakly connected components
+    'user_id_major,user_id_minor');                                                 -- Grouping column name
+SELECT * FROM wcc_multicol_out ORDER BY user_id_major, user_id_minor, component_id, id;
+</pre>
+<pre class="result">
+  id   | component_id | user_id_major | user_id_minor
+-------+--------------+---------------+---------------
+ {0,0} |            3 |             1 |             1
+ {1,1} |            3 |             1 |             1
+ {2,2} |            3 |             1 |             1
+ {3,3} |            3 |             1 |             1
+ {4,4} |            3 |             1 |             1
+ {5,5} |            3 |             1 |             1
+ {6,6} |            3 |             1 |             1
+ {0,0} |            3 |             2 |             2
+ {1,1} |            3 |             2 |             2
+ {2,2} |            3 |             2 |             2
+ {3,3} |            3 |             2 |             2
+ {4,4} |            3 |             2 |             2
+ {5,5} |            3 |             2 |             2
+ {6,6} |            3 |             2 |             2
+(14 rows)
+</pre>
+
 @anchor notes
 @par Notes
 
@@ -548,7 +635,7 @@ m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
 
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_vertex_check(
     wcc_table           TEXT,
-    vertex_pair         TEXT,
+    vertex_pair         BIGINT[],
     pair_table          TEXT
 
 ) RETURNS VOID AS $$
@@ -559,7 +646,17 @@ m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
 
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_reachable_vertices(
     wcc_table                    TEXT,
-    src                          TEXT,
+    src                          BIGINT,
+    reachable_vertices_table     TEXT
+
+) RETURNS VOID AS $$
+    PythonFunction(graph, wcc, graph_wcc_reachable_vertices)
+$$ LANGUAGE plpythonu VOLATILE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_wcc_reachable_vertices(
+    wcc_table                    TEXT,
+    src                          BIGINT[],
     reachable_vertices_table     TEXT
 
 ) RETURNS VOID AS $$
diff --git a/src/ports/postgres/modules/utilities/validate_args.py_in b/src/ports/postgres/modules/utilities/validate_args.py_in
index 20a11c2a..92507449 100644
--- a/src/ports/postgres/modules/utilities/validate_args.py_in
+++ b/src/ports/postgres/modules/utilities/validate_args.py_in
@@ -381,6 +381,10 @@ def columns_exist_in_table(tbl, cols, schema_madlib="madlib"):
         True if all columns in 'cols' exist in source table else False
     """
     existing_cols = set(unquote_ident(i) for i in get_cols(tbl))
+
+    if isinstance(cols, StringTypes):
+        cols = [cols]
+
     for col in cols:
         if not col or unquote_ident(col) not in existing_cols:
             return False