You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2018/04/17 21:06:46 UTC

[6/6] madlib git commit: Pagerank: Update docs for PPR

Pagerank: Update docs for PPR

Closes #264


Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/47007aa2
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/47007aa2
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/47007aa2

Branch: refs/heads/master
Commit: 47007aa29a24bccef4bbc50b99978e7cb20d035f
Parents: 0f9f12f
Author: Frank McQuillan <fm...@pivotal.io>
Authored: Tue Apr 17 14:04:20 2018 -0700
Committer: Rahul Iyer <ri...@apache.org>
Committed: Tue Apr 17 14:06:19 2018 -0700

----------------------------------------------------------------------
 .../postgres/modules/graph/pagerank.sql_in      | 29 ++++++++++++++------
 1 file changed, 21 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/madlib/blob/47007aa2/src/ports/postgres/modules/graph/pagerank.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/pagerank.sql_in b/src/ports/postgres/modules/graph/pagerank.sql_in
index a898b30..30a0b50 100644
--- a/src/ports/postgres/modules/graph/pagerank.sql_in
+++ b/src/ports/postgres/modules/graph/pagerank.sql_in
@@ -48,6 +48,11 @@ This algorithm was originally used by Google to rank websites where the World Wi
 modeled as a directed graph with the vertices representing the websites.  The PageRank
 algorithm initially proposed by Larry Page and Sergey Brin is implemented here [1].
 
+We also implement personalized PageRank, in which a notion of importance 
+provides personalization to a query.  
+For example, importance scores can be biased according
+to a specified set of vertices in the graph that are of interest or special in some way [2].
+
 @anchor pagerank
 @par PageRank
 <pre class="syntax">
@@ -121,10 +126,10 @@ distribution per group. When this value is NULL, no grouping is used and
 a single model is generated for all data.
 @note Expressions are not currently supported for 'grouping_cols'.</dd>
 
-<dt> personalization_vertices (optional) </dt>
-<dd>ARRAY OF INTEGER, default: NULL. A comma seperated list of vertices or nodes
-for personalized page rank. When this parameter is provided, Personalized Page Rank
-will run and in the absence of this parameter, regular PageRank will run.
+<dt> personalization_vertices (optional)</dt>
+<dd>INTEGER[], default: NULL. A comma separated list of vertices or nodes
+for personalized PageRank. When this parameter is provided, personalized PageRank
+will run.  In the absence of this parameter, regular PageRank will run.
 </dl>
 
 @anchor examples
@@ -278,7 +283,9 @@ SELECT * FROM pagerank_out_summary ORDER BY user_id;
 (2 rows)
 </pre>
 
--# Example of Personalized Page Rank with Nodes {2,4}. personalization_vertices can be passed in ARRAY[2,4] format as well.
+-# Personalized PageRank. Here we specify {2,4}
+as the personalization vertices. This parameter
+could be specified as ARRAY[2,4] as well.
 <pre class="syntax">
 DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
 SELECT madlib.pagerank(
@@ -291,7 +298,7 @@ SELECT madlib.pagerank(
                         NULL,                -- Default max iters (100)
                         NULL,                -- Default Threshold
                         NULL,                -- No Grouping
-                       '{2,4}');             -- Personlized Nodes
+                       '{2,4}');             -- Personalization vertices
 SELECT * FROM pagerank_out ORDER BY pagerank DESC;
 </pre>
 <pre class="result">
@@ -305,7 +312,6 @@ SELECT * FROM pagerank_out ORDER BY pagerank DESC;
   6 |  0.148615315574136
   5 | 0.0803403307142321
 (7 rows)
-
 </pre>
 <pre class="syntax">
 SELECT * FROM pagerank_out_summary;
@@ -320,7 +326,14 @@ SELECT * FROM pagerank_out_summary;
 @anchor literature
 @par Literature
 
-[1] PageRank algorithm. https://en.wikipedia.org/wiki/PageRank
+[1] Brin, S. and Page, L. (1998), "The anatomy of a large-scale hypertextual Web search engine", 
+Computer Networks and ISDN Systems. 30: 107–117, 
+http://infolab.stanford.edu/pub/papers/google.pdf
+
+[2] Jeh, Glen and Widom, Jennifer. "Scaling Personalized Web Search",
+Proceedings of the 12th international conference on World Wide Web, Pages 271-279 
+Budapest, Hungary, May 20-24, 2003, 
+http://ilpubs.stanford.edu:8090/530/1/2002-12.pdf
 */
 
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pagerank(