You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2017/06/16 20:57:46 UTC
[09/34] incubator-madlib git commit: Feature: PageRank
Feature: PageRank
JIRA: MADLIB-1069
- Introduces a new module that computes the PageRank of all nodes
in a directed graph.
- Implements the original PageRank algorithm that assumes a random
surfer model (https://en.wikipedia.org/wiki/PageRank#Damping_factor)
- This version does not perform convergence test yet, so the PageRank
computation runs through all iterations. Exiting on convergence
will be handled as part of MADLIB-1082. The threshold parameter
specified will be ignored.
Closes #109
Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/6b466ea6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/6b466ea6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/6b466ea6
Branch: refs/heads/latest_release
Commit: 6b466ea6d19731e8500cc89058d1a77bf778121a
Parents: d344f1f
Author: Nandish Jayaram <nj...@apache.org>
Authored: Thu Mar 16 12:02:40 2017 -0700
Committer: Nandish Jayaram <nj...@apache.org>
Committed: Thu Mar 30 17:06:17 2017 -0700
----------------------------------------------------------------------
doc/mainpage.dox.in | 3 +
.../postgres/modules/graph/graph_utils.py_in | 8 +-
src/ports/postgres/modules/graph/pagerank.py_in | 288 +++++++++++++++++++
.../postgres/modules/graph/pagerank.sql_in | 271 +++++++++++++++++
.../postgres/modules/graph/test/pagerank.sql_in | 62 ++++
5 files changed, 628 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/6b466ea6/doc/mainpage.dox.in
----------------------------------------------------------------------
diff --git a/doc/mainpage.dox.in b/doc/mainpage.dox.in
index 9131c10..94950e7 100644
--- a/doc/mainpage.dox.in
+++ b/doc/mainpage.dox.in
@@ -122,6 +122,9 @@ complete matrix stored as a distributed table.
@ingroup grp_datatrans
@defgroup grp_graph Graph
@{Contains graph algorithms. @}
+ @defgroup grp_pagerank PageRank
+ @ingroup grp_graph
+
@defgroup grp_sssp Single Source Shortest Path
@ingroup grp_graph
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/6b466ea6/src/ports/postgres/modules/graph/graph_utils.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/graph_utils.py_in b/src/ports/postgres/modules/graph/graph_utils.py_in
index fb43491..2d83301 100644
--- a/src/ports/postgres/modules/graph/graph_utils.py_in
+++ b/src/ports/postgres/modules/graph/graph_utils.py_in
@@ -69,11 +69,11 @@ def validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params,
existing_cols = set(unquote_ident(i) for i in get_cols(vertex_table))
_assert(vertex_id in existing_cols,
- """Graph {func_name}: The vertex column {vertex_id} is not present in
- vertex table ({vertex_table}) """.format(**locals()))
+ """Graph {func_name}: The vertex column {vertex_id} is not present in vertex table ({vertex_table}) """.
+ format(**locals()))
_assert(columns_exist_in_table(edge_table, edge_params.values()),
- """Graph {func_name}: Not all columns from {cols} present in edge
- table ({edge_table})""".format(cols=edge_params.values(), **locals()))
+ """Graph {func_name}: Not all columns from {cols} present in edge table ({edge_table})""".
+ format(cols=edge_params.values(), **locals()))
return None
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/6b466ea6/src/ports/postgres/modules/graph/pagerank.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/pagerank.py_in b/src/ports/postgres/modules/graph/pagerank.py_in
new file mode 100644
index 0000000..13cdcc5
--- /dev/null
+++ b/src/ports/postgres/modules/graph/pagerank.py_in
@@ -0,0 +1,288 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# PageRank
+
+# Please refer to the pagerank.sql_in file for the documentation
+
+"""
+@file pagerank.py_in
+
+@namespace graph
+"""
+
+import plpy
+from utilities.control import MinWarning
+from utilities.utilities import _assert
+from utilities.utilities import extract_keyvalue_params
+from utilities.utilities import unique_string
+from utilities.control import IterationController2S
+from graph_utils import *
+
+import time
+
+m4_changequote(`<!', `!>')
+
+def validate_pagerank_args(vertex_table, vertex_id, edge_table, edge_params,
+ out_table, damping_factor, max_iter, threshold, module_name):
+ """
+ Function to validate input parameters for PageRank
+ """
+ validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params,
+ out_table, module_name)
+ _assert(damping_factor >= 0.0 and damping_factor <= 1.0,
+ """PageRank: Invalid damping factor value ({0}), must be between 0 and 1."""
+ .format(damping_factor))
+ _assert(threshold >= 0.0 and threshold <= 1.0,
+ """PageRank: Invalid threshold value ({0}), must be between 0 and 1."""
+ .format(threshold))
+ _assert(max_iter > 0,
+ """PageRank: Invalid max_iter value ({0}), must be a positive integer. """
+ .format(max_iter))
+
+def pagerank(schema_madlib, vertex_table, vertex_id, edge_table, edge_args,
+ out_table, damping_factor, max_iter, threshold, **kwargs):
+ """
+ Function that computes the PageRank
+
+ Args:
+ @param vertex_table
+ @param vertex_id
+ @param edge_table
+ @param source_vertex
+ @param dest_vertex
+ @param out_table
+ @param damping_factor
+ @param max_iter
+ @param threshold
+ """
+ old_msg_level = plpy.execute("""
+ SELECT setting
+ FROM pg_settings
+ WHERE name='client_min_messages'
+ """)[0]['setting']
+ plpy.execute('SET client_min_messages TO warning')
+ params_types = {'src': str, 'dest': str}
+ default_args = {'src': 'src', 'dest': 'dest'}
+ edge_params = extract_keyvalue_params(edge_args, params_types, default_args)
+
+ # populate default values for optional params if null
+ if damping_factor is None:
+ damping_factor = 0.85
+ if max_iter is None:
+ max_iter = 100
+ if threshold is None:
+ threshold = 0.00001
+ if vertex_id is None:
+ vertex_id = "id"
+ validate_pagerank_args(vertex_table, vertex_id, edge_table, edge_params,
+ out_table, damping_factor, max_iter, threshold, 'PageRank')
+ src = edge_params["src"]
+ dest = edge_params["dest"]
+
+ edge_temp_table = unique_string(desp='temp_edge')
+ distribution = m4_ifdef(<!__POSTGRESQL__!>, <!''!>,
+ <!"DISTRIBUTED BY ({0})".format(dest)!>)
+ plpy.execute("""
+ DROP TABLE IF EXISTS {edge_temp_table};
+ CREATE TEMP TABLE {edge_temp_table} AS
+ SELECT * FROM {edge_table}
+ {distribution}
+ """.format(**locals()))
+ # GPDB and HAWQ have distributed by clauses to help them with indexing.
+ # For Postgres we add the indices manually.
+ sql_index = m4_ifdef(<!__POSTGRESQL__!>,
+ <!"""CREATE INDEX ON {edge_temp_table} ({src});
+ """.format(**locals())!>,
+ <!''!>)
+ plpy.execute(sql_index)
+
+ nvertices = plpy.execute("""
+ SELECT COUNT({0}) AS cnt
+ FROM {1}
+ """.format(vertex_id, vertex_table))[0]["cnt"]
+ init_value = 1.0/nvertices
+ random_prob = (1.0-damping_factor)/nvertices
+ cur = unique_string(desp='cur')
+ message = unique_string(desp='message')
+ plpy.execute("""
+ CREATE TEMP TABLE {cur} AS
+ SELECT {vertex_id}, {init_value}::DOUBLE PRECISION AS pagerank
+ FROM {vertex_table}
+ """.format(**locals()))
+ v1 = unique_string(desp='v1')
+
+ out_cnts = unique_string(desp='out_cnts')
+ out_cnts_cnt = unique_string(desp='cnt')
+ # Compute the out-degree of every node in the graph.
+ cnts_distribution = m4_ifdef(<!__POSTGRESQL__!>, <!''!>,
+ <!"DISTRIBUTED BY ({0})".format(vertex_id)!>)
+
+ plpy.execute("""
+ DROP TABLE IF EXISTS {out_cnts};
+ CREATE TEMP TABLE {out_cnts} AS
+ SELECT {src} AS {vertex_id}, COUNT({dest}) AS {out_cnts_cnt}
+ FROM {edge_table}
+ GROUP BY {src}
+ {cnts_distribution}
+ """.format(**locals()))
+
+ for i in range(max_iter):
+ #####################################################################
+ # PageRank for node 'A' at any given iteration 'i' is given by:
+ # PR_i(A) = damping_factor(PR_i-1(B)/degree(B) + PR_i-1(C)/degree(C) + ...) + (1-damping_factor)/N
+ # where 'N' is the number of vertices in the graph,
+ # B, C are nodes that have edges to node A, and
+ # degree(node) represents the number of outgoing edges from 'node'
+ #####################################################################
+ # Essentially, the pagerank for a node is based on an aggregate of a
+ # fraction of the pagerank values of all the nodes that have incoming
+ # edges to it, along with a small random probability.
+ # More information can be found at:
+ # https://en.wikipedia.org/wiki/PageRank#Damping_factor
+
+ # The query below computes the PageRank of each node using the above formula.
+ plpy.execute("""
+ CREATE TABLE {message} AS
+ SELECT {edge_temp_table}.{dest} AS {vertex_id},
+ SUM({v1}.pagerank/{out_cnts}.{out_cnts_cnt})*{damping_factor}+{random_prob} AS pagerank
+ FROM {edge_temp_table}
+ INNER JOIN {cur} ON {edge_temp_table}.{dest}={cur}.{vertex_id}
+ INNER JOIN {out_cnts} ON {out_cnts}.{vertex_id}={edge_temp_table}.{src}
+ INNER JOIN {cur} AS {v1} ON {v1}.{vertex_id}={edge_temp_table}.{src}
+ GROUP BY {edge_temp_table}.{dest}
+ """.format(**locals()))
+ # If there are nodes that have no incoming edges, they are not captured in the message table.
+ # Insert entries for such nodes, with random_prob.
+ plpy.execute("""
+ INSERT INTO {message}
+ SELECT {vertex_id}, {random_prob}::DOUBLE PRECISION AS pagerank
+ FROM {cur}
+ WHERE {vertex_id} NOT IN (
+ SELECT {vertex_id}
+ FROM {message}
+ )
+ """.format(**locals()))
+ # Check for convergence will be done as part of grouping support for pagerank:
+ # https://issues.apache.org/jira/browse/MADLIB-1082. So, the threshold parameter
+ # is a dummy variable at the moment, the PageRank computation happens for
+ # {max_iter} number of times.
+ plpy.execute("""
+ DROP TABLE IF EXISTS {cur};
+ ALTER TABLE {message} RENAME TO {cur}
+ """.format(**locals()))
+
+ plpy.execute("ALTER TABLE {cur} RENAME TO {out_table}".format(**locals()))
+
+ ## Step 4: Cleanup
+ plpy.execute("""
+ DROP TABLE IF EXISTS {0},{1},{2},{3};
+ """.format(out_cnts, edge_temp_table, cur, message))
+ plpy.execute("SET client_min_messages TO %s" % old_msg_level)
+
+def pagerank_help(schema_madlib, message, **kwargs):
+ """
+ Help function for pagerank
+
+ Args:
+ @param schema_madlib
+ @param message: string, Help message string
+ @param kwargs
+
+ Returns:
+ String. Help/usage information
+ """
+ if message is not None and \
+ message.lower() in ("usage", "help", "?"):
+ help_string = "Get from method below"
+ help_string = get_graph_usage(schema_madlib, 'PageRank',
+ """out_table TEXT, -- Name of the output table for PageRank
+ damping_factor, DOUBLE PRECISION, -- Damping factor in random surfer model
+ -- (DEFAULT = 0.85)
+ max_iter, INTEGER, -- Maximum iteration number (DEFAULT = 100)
+ threshold DOUBLE PRECISION -- Stopping criteria (DEFAULT = 1e-5)
+""")
+ else:
+ if message is not None and \
+ message.lower() in ("example", "examples"):
+ help_string = """
+----------------------------------------------------------------------------
+ EXAMPLES
+----------------------------------------------------------------------------
+-- Create a graph, represented as vertex and edge tables.
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+ id INTEGER
+ );
+CREATE TABLE edge(
+ src INTEGER,
+ dest INTEGER
+ );
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6);
+INSERT INTO edge VALUES
+(0, 1),
+(0, 2),
+(0, 4),
+(1, 2),
+(1, 3),
+(2, 3),
+(2, 5),
+(2, 6),
+(3, 0),
+(4, 0),
+(5, 6),
+(6, 3);
+
+-- Compute the PageRank:
+DROP TABLE IF EXISTS pagerank_out;
+SELECT madlib.pagerank(
+ 'vertex', -- Vertex table
+ 'id', -- Vertix id column
+ 'edge', -- Edge table
+ 'src=src, dest=dest', -- Comma delimted string of edge arguments
+ 'pagerank_out') -- Output table of PageRank
+
+-- View the PageRank of all vertices, sorted by their scores.
+SELECT * FROM pagerank_out ORDER BY pagerank desc;
+"""
+ else:
+ help_string = """
+----------------------------------------------------------------------------
+ SUMMARY
+----------------------------------------------------------------------------
+Given a directed graph, pagerank algorithm finds the PageRank score of all
+the vertices in the graph.
+--
+For an overview on usage, run:
+SELECT {schema_madlib}.pagerank('usage');
+
+For some examples, run:
+SELECT {schema_madlib}.pagerank('example')
+--
+"""
+
+ return help_string.format(schema_madlib=schema_madlib)
+# ---------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/6b466ea6/src/ports/postgres/modules/graph/pagerank.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/pagerank.sql_in b/src/ports/postgres/modules/graph/pagerank.sql_in
new file mode 100644
index 0000000..712d146
--- /dev/null
+++ b/src/ports/postgres/modules/graph/pagerank.sql_in
@@ -0,0 +1,271 @@
+/* ----------------------------------------------------------------------- *//**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *
+ * @file graph.sql_in
+ *
+ * @brief SQL functions for graph analytics
+ * @date Nov 2016
+ *
+ * @sa Provides various graph algorithms.
+ *
+ *//* ----------------------------------------------------------------------- */
+m4_include(`SQLCommon.m4')
+
+
+/**
+@addtogroup grp_pagerank
+
+<div class="toc"><b>Contents</b>
+<ul>
+<li><a href="#pagerank">PageRank</a></li>
+<li><a href="#notes">Notes</a></li>
+<li><a href="#examples">Examples</a></li>
+<li><a href="#literature">Literature</a></li>
+</ul>
+</div>
+
+@brief Find the PageRank of all vertices in a directed graph.
+
+Given a graph, the PageRank algorithm outputs a probability distribution representing the
+likelihood that a person randomly traversing the graph will arrive at any particular vertex.
+This algorithm was originally used by Google to rank websites where the World Wide Web was
+modeled as a directed graph with the vertices representing the websites.
+
+@anchor pagerank
+@par PageRank
+<pre class="syntax">
+pagerank( vertex_table,
+ vertex_id,
+ edge_table,
+ edge_args,
+ out_table,
+ damping_factor,
+ max_iter,
+ threshold
+ )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+<dt>vertex_table</dt>
+<dd>TEXT. Name of the table containing the vertex data for the graph. Must contain the
+column specified in the 'vertex_id' parameter below.</dd>
+
+<dt>vertex_id</dt>
+<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
+vertex ids. The vertex ids are of type INTEGER with no duplicates.
+They do not need to be contiguous.</dd>
+
+<dt>edge_table</dt>
+<dd>TEXT. Name of the table containing the edge data. The edge table must
+contain columns for source vertex and destination vertex.</dd>
+
+<dt>edge_args</dt>
+<dd>TEXT. A comma-delimited string containing multiple named arguments of
+the form "name=value". The following parameters are supported for
+this string argument:
+ - src (INTEGER): Name of the column containing the source vertex ids in the edge table.
+ Default column name is 'src'.
+ - dest (INTEGER): Name of the column containing the destination vertex ids in the edge table.
+ Default column name is 'dest'.</dd>
+
+<dt>out_table</dt>
+<dd>TEXT. Name of the table to store the result of PageRank.
+It will contain a row for every vertex from 'vertex_table' with
+the following columns:
+ - vertex_id : The id of a vertex. Will use the input parameter 'vertex_id' for column naming.
+ - pagerank : The vertex's PageRank.</dd>
+
+<dt>damping_factor</dt>
+<dd>FLOAT8, default 0.85. The probability, at any step, that a user will continue following the links in a random surfer model.</dd>
+
+<dt>max_iter</dt>
+<dd>INTEGER, default: 100. The maximum number of iterations allowed.</dd>
+
+<dt>threshold</dt>
+<dd>FLOAT8, default: 1e-5. If the difference between the PageRank of every vertex of two consecutive
+iterations is smaller than 'threshold', or the iteration number is larger than 'max_iter', the
+computation stops. If you set the threshold to zero, then you will force the algorithm to run for the full number of iterations specified in 'max_iter'.</dd>
+
+</dl>
+
+@anchor notes
+@par Notes
+
+The PageRank algorithm proposed by Larry Page and Sergey Brin is used [1].
+
+@anchor examples
+@examp
+
+-# Create vertex and edge tables to represent the graph:
+<pre class="syntax">
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+ id INTEGER
+ );
+CREATE TABLE edge(
+ src INTEGER,
+ dest INTEGER
+ );
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6);
+INSERT INTO edge VALUES
+(0, 1),
+(0, 2),
+(0, 4),
+(1, 2),
+(1, 3),
+(2, 3),
+(2, 5),
+(2, 6),
+(3, 0),
+(4, 0),
+(5, 6),
+(6, 3);
+</pre>
+
+-# Compute the PageRank:
+<pre class="syntax">
+DROP TABLE IF EXISTS pagerank_out;
+SELECT madlib.pagerank(
+ 'vertex', -- Vertex table
+ 'id', -- Vertix id column
+ 'edge', -- Edge table
+ 'src=src, dest=dest', -- Comma delimted string of edge arguments
+ 'pagerank_out'); -- Output table of PageRank
+SELECT * FROM pagerank_out ORDER BY pagerank desc;
+</pre>
+<pre class="result">
+ id | pagerank
+----+--------------------
+ 0 | 0.278256122055856
+ 3 | 0.201882680839737
+ 2 | 0.142878491945534
+ 6 | 0.114538731993905
+ 1 | 0.100266150276761
+ 4 | 0.100266150276761
+ 5 | 0.061911672611445
+(7 rows)
+</pre>
+
+-# Run PageRank with a damping factor of 0.5 results in different final values:
+<pre class="syntax">
+DROP TABLE IF EXISTS pagerank_out;
+SELECT madlib.pagerank(
+ 'vertex', -- Vertex table
+ 'id', -- Vertix id column
+ 'edge', -- Edge table
+ 'src=src, dest=dest', -- Comma delimted string of edge arguments
+ 'pagerank_out', -- Output table of PageRank
+ 0.5); -- Damping factor
+SELECT * FROM pagerank_out ORDER BY pagerank desc;
+</pre>
+<pre class="result">
+ id | pagerank
+----+-------------------
+ 0 | 0.221378135793372
+ 3 | 0.191574922960784
+ 6 | 0.140994575864846
+ 2 | 0.135406336658892
+ 4 | 0.108324751971412
+ 1 | 0.108324751971412
+ 5 | 0.093996524779681
+(7 rows)
+</pre>
+
+@anchor literature
+@par Literature
+
+[1] PageRank algorithm. https://en.wikipedia.org/wiki/PageRank
+*/
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pagerank(
+ vertex_table TEXT,
+ vertex_id TEXT,
+ edge_table TEXT,
+ edge_args TEXT,
+ out_table TEXT,
+ damping_factor FLOAT8,
+ max_iter INTEGER,
+ threshold FLOAT8
+) RETURNS VOID AS $$
+ PythonFunction(graph, pagerank, pagerank)
+$$ LANGUAGE plpythonu VOLATILE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
+-------------------------------------------------------------------------
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pagerank(
+ vertex_table TEXT,
+ vertex_id TEXT,
+ edge_table TEXT,
+ edge_args TEXT,
+ out_table TEXT,
+ damping_factor FLOAT8,
+ max_iter INTEGER
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.pagerank($1, $2, $3, $4, $5, $6, $7, 0.00001)
+$$ LANGUAGE SQL
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+-------------------------------------------------------------------------
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pagerank(
+ vertex_table TEXT,
+ vertex_id TEXT,
+ edge_table TEXT,
+ edge_args TEXT,
+ out_table TEXT,
+ damping_factor FLOAT8
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.pagerank($1, $2, $3, $4, $5, $6, 100, 0.00001)
+$$ LANGUAGE SQL
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+-------------------------------------------------------------------------
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pagerank(
+ vertex_table TEXT,
+ vertex_id TEXT,
+ edge_table TEXT,
+ edge_args TEXT,
+ out_table TEXT
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.pagerank($1, $2, $3, $4, $5, 0.85, 100, 0.00001)
+$$ LANGUAGE SQL
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+-------------------------------------------------------------------------
+
+-- Online help
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pagerank(
+ message VARCHAR
+) RETURNS VARCHAR AS $$
+ PythonFunction(graph, pagerank, pagerank_help)
+$$ LANGUAGE plpythonu IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pagerank()
+RETURNS VARCHAR AS $$
+ SELECT MADLIB_SCHEMA.pagerank('');
+$$ LANGUAGE sql IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+--------------------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/6b466ea6/src/ports/postgres/modules/graph/test/pagerank.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/test/pagerank.sql_in b/src/ports/postgres/modules/graph/test/pagerank.sql_in
new file mode 100644
index 0000000..1d695e2
--- /dev/null
+++ b/src/ports/postgres/modules/graph/test/pagerank.sql_in
@@ -0,0 +1,62 @@
+/* ----------------------------------------------------------------------- *//**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ *//* ----------------------------------------------------------------------- */
+
+DROP TABLE IF EXISTS vertex, edge, pagerank_out;
+CREATE TABLE vertex(
+ id INTEGER
+ );
+CREATE TABLE edge(
+ src INTEGER,
+ dest INTEGER
+ );
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6);
+INSERT INTO edge VALUES
+(0, 1),
+(0, 2),
+(0, 4),
+(1, 2),
+(1, 3),
+(2, 3),
+(2, 5),
+(2, 6),
+(3, 0),
+(4, 0),
+(5, 6),
+(6, 3);
+
+SELECT madlib.pagerank(
+ 'vertex', -- Vertex table
+ 'id', -- Vertix id column
+ 'edge', -- Edge table
+ 'src=src, dest=dest', -- Edge args
+ 'pagerank_out'); -- Output table of PageRank
+
+-- View the PageRank of all vertices, sorted by their scores.
+SELECT assert(relative_error(SUM(pagerank), 1) < 0.00001,
+ 'PageRank: Scores do not sum up to 1.'
+ ) FROM pagerank_out;