You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by fm...@apache.org on 2018/04/23 21:57:51 UTC
[07/15] madlib-site git commit: jupyter notebooks for 1.14 release
http://git-wip-us.apache.org/repos/asf/madlib-site/blob/3f849b9e/community-artifacts/PageRank-v1.ipynb
----------------------------------------------------------------------
diff --git a/community-artifacts/PageRank-v1.ipynb b/community-artifacts/PageRank-v1.ipynb
deleted file mode 100644
index 32b1caf..0000000
--- a/community-artifacts/PageRank-v1.ipynb
+++ /dev/null
@@ -1,774 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# PageRank\n",
- "The PageRank algorithm produces a probability distribution representing the likelihood that a person randomly traversing a graph will arrive at any particular vertex. PageRank was added in MADlib 1.11."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated. You should import from traitlets.config instead.\n",
- " \"You should import from traitlets.config instead.\", ShimWarning)\n",
- "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n",
- " warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n"
- ]
- }
- ],
- "source": [
- "%load_ext sql"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "text/plain": [
- "u'Connected: fmcquillan@madlib'"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Greenplum 4.3.10.0\n",
- "#%sql postgresql://gpdbchina@10.194.10.68:61000/madlib\n",
- " \n",
- "# PostgreSQL local\n",
- "%sql postgresql://fmcquillan@localhost:5432/madlib\n",
- "\n",
- "# Greenplum 4.2.3.0\n",
- "#%sql postgresql://gpdbchina@10.194.10.68:55000/madlib"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "1 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "<table>\n",
- " <tr>\n",
- " <th>version</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>MADlib version: 1.11-dev, git revision: rc/v1.9alpha-rc1-138-gcc5ce09, cmake configuration time: Tue Apr 11 20:47:30 UTC 2017, build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0</td>\n",
- " </tr>\n",
- "</table>"
- ],
- "text/plain": [
- "[(u'MADlib version: 1.11-dev, git revision: rc/v1.9alpha-rc1-138-gcc5ce09, cmake configuration time: Tue Apr 11 20:47:30 UTC 2017, build type: Release, build system: Linux-2.6.18-238.27.1.el5.hotfix.bz516490, C compiler: gcc 4.4.0, C++ compiler: g++ 4.4.0',)]"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "%sql select madlib.version();\n",
- "#%sql select version();"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 1. Create vertex and edge tables"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Done.\n",
- "Done.\n",
- "Done.\n",
- "7 rows affected.\n",
- "22 rows affected.\n",
- "22 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "<table>\n",
- " <tr>\n",
- " <th>src</th>\n",
- " <th>dest</th>\n",
- " <th>user_id</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>0</td>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>0</td>\n",
- " <td>2</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>0</td>\n",
- " <td>2</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>0</td>\n",
- " <td>4</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>0</td>\n",
- " <td>4</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>0</td>\n",
- " <td>1</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>3</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>3</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>2</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>2</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>5</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>3</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>3</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>6</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>3</td>\n",
- " <td>0</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>3</td>\n",
- " <td>0</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>4</td>\n",
- " <td>0</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>4</td>\n",
- " <td>0</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>5</td>\n",
- " <td>6</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>5</td>\n",
- " <td>6</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>6</td>\n",
- " <td>3</td>\n",
- " <td>1</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>6</td>\n",
- " <td>3</td>\n",
- " <td>2</td>\n",
- " </tr>\n",
- "</table>"
- ],
- "text/plain": [
- "[(0, 1, 1),\n",
- " (0, 2, 2),\n",
- " (0, 2, 1),\n",
- " (0, 4, 2),\n",
- " (0, 4, 1),\n",
- " (0, 1, 2),\n",
- " (1, 3, 1),\n",
- " (1, 3, 2),\n",
- " (1, 2, 2),\n",
- " (1, 2, 1),\n",
- " (2, 5, 1),\n",
- " (2, 3, 1),\n",
- " (2, 3, 2),\n",
- " (2, 6, 1),\n",
- " (3, 0, 2),\n",
- " (3, 0, 1),\n",
- " (4, 0, 1),\n",
- " (4, 0, 2),\n",
- " (5, 6, 2),\n",
- " (5, 6, 1),\n",
- " (6, 3, 1),\n",
- " (6, 3, 2)]"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "%%sql \n",
- "DROP TABLE IF EXISTS vertex, edge;\n",
- "\n",
- "CREATE TABLE vertex(\n",
- " id INTEGER\n",
- " );\n",
- "\n",
- "CREATE TABLE edge(\n",
- " src INTEGER,\n",
- " dest INTEGER,\n",
- " user_id INTEGER\n",
- " );\n",
- "\n",
- "INSERT INTO vertex VALUES\n",
- "(0),\n",
- "(1),\n",
- "(2),\n",
- "(3),\n",
- "(4),\n",
- "(5),\n",
- "(6);\n",
- "\n",
- "INSERT INTO edge VALUES\n",
- "(0, 1, 1),\n",
- "(0, 2, 1),\n",
- "(0, 4, 1),\n",
- "(1, 2, 1),\n",
- "(1, 3, 1),\n",
- "(2, 3, 1),\n",
- "(2, 5, 1),\n",
- "(2, 6, 1),\n",
- "(3, 0, 1),\n",
- "(4, 0, 1),\n",
- "(5, 6, 1),\n",
- "(6, 3, 1),\n",
- "(0, 1, 2),\n",
- "(0, 2, 2),\n",
- "(0, 4, 2),\n",
- "(1, 2, 2),\n",
- "(1, 3, 2),\n",
- "(2, 3, 2),\n",
- "(3, 0, 2),\n",
- "(4, 0, 2),\n",
- "(5, 6, 2),\n",
- "(6, 3, 2);\n",
- "\n",
- "SELECT * from edge ORDER BY src;"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 2. Calculate the PageRank"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Done.\n",
- "1 rows affected.\n",
- "7 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "<table>\n",
- " <tr>\n",
- " <th>id</th>\n",
- " <th>pagerank</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>0</td>\n",
- " <td>0.287537493412</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>3</td>\n",
- " <td>0.210169889019</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>0.146626834541</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>4</td>\n",
- " <td>0.102896143842</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>0.102896143842</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>6</td>\n",
- " <td>0.0972863776889</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>5</td>\n",
- " <td>0.0525871176569</td>\n",
- " </tr>\n",
- "</table>"
- ],
- "text/plain": [
- "[(0, 0.287537493411842),\n",
- " (3, 0.210169889018553),\n",
- " (2, 0.146626834540625),\n",
- " (4, 0.10289614384217),\n",
- " (1, 0.10289614384217),\n",
- " (6, 0.097286377688878),\n",
- " (5, 0.0525871176569217)]"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "%%sql\n",
- "DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;\n",
- "\n",
- "SELECT madlib.pagerank(\n",
- " 'vertex', -- Vertex table\n",
- " 'id', -- Vertix id column\n",
- " 'edge', -- Edge table\n",
- " 'src=src, dest=dest', -- Comma delimted string of edge arguments\n",
- " 'pagerank_out'); -- Output table of PageRank\n",
- "\n",
- "SELECT * FROM pagerank_out ORDER BY pagerank DESC;"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Look at the summary table:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "1 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "<table>\n",
- " <tr>\n",
- " <th>__iterations__</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>16</td>\n",
- " </tr>\n",
- "</table>"
- ],
- "text/plain": [
- "[(16,)]"
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "%%sql\n",
- "SELECT * FROM pagerank_out_summary;"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now run PageRank with a damping factor of 0.5 which results in different final values:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Done.\n",
- "1 rows affected.\n",
- "7 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "<table>\n",
- " <tr>\n",
- " <th>id</th>\n",
- " <th>pagerank</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>0</td>\n",
- " <td>0.225477161441</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>3</td>\n",
- " <td>0.199090328587</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>0.136261327206</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>6</td>\n",
- " <td>0.132691559968</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>4</td>\n",
- " <td>0.10900929141</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>0.10900929141</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>5</td>\n",
- " <td>0.0884610399788</td>\n",
- " </tr>\n",
- "</table>"
- ],
- "text/plain": [
- "[(0, 0.225477161441199),\n",
- " (3, 0.199090328586664),\n",
- " (2, 0.136261327206477),\n",
- " (6, 0.132691559968224),\n",
- " (4, 0.109009291409508),\n",
- " (1, 0.109009291409508),\n",
- " (5, 0.0884610399788161)]"
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "%%sql\n",
- "DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;\n",
- "SELECT madlib.pagerank(\n",
- " 'vertex', -- Vertex table\n",
- " 'id', -- Vertix id column\n",
- " 'edge', -- Edge table\n",
- " 'src=src, dest=dest', -- Comma delimted string of edge arguments\n",
- " 'pagerank_out', -- Output table of PageRank\n",
- " 0.5); -- Damping factor\n",
- "SELECT * FROM pagerank_out ORDER BY pagerank DESC;"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 3. Grouping"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now compute the PageRank distribution separately for each user using the grouping feature:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Done.\n",
- "1 rows affected.\n",
- "14 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "<table>\n",
- " <tr>\n",
- " <th>user_id</th>\n",
- " <th>id</th>\n",
- " <th>pagerank</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>0</td>\n",
- " <td>0.278254883886</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>3</td>\n",
- " <td>0.201881146671</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>2</td>\n",
- " <td>0.142881123461</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>6</td>\n",
- " <td>0.114536378321</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>4</td>\n",
- " <td>0.100267456154</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>1</td>\n",
- " <td>0.100267456154</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>5</td>\n",
- " <td>0.0619115553529</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>0</td>\n",
- " <td>0.318546250042</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>3</td>\n",
- " <td>0.237866867733</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>2</td>\n",
- " <td>0.159148764894</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>1</td>\n",
- " <td>0.11168334438</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>4</td>\n",
- " <td>0.11168334438</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>6</td>\n",
- " <td>0.0396428571429</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>5</td>\n",
- " <td>0.0214285714286</td>\n",
- " </tr>\n",
- "</table>"
- ],
- "text/plain": [
- "[(1, 0, 0.278254883885528),\n",
- " (1, 3, 0.201881146670752),\n",
- " (1, 2, 0.142881123460599),\n",
- " (1, 6, 0.114536378321472),\n",
- " (1, 4, 0.10026745615438),\n",
- " (1, 1, 0.10026745615438),\n",
- " (1, 5, 0.0619115553528898),\n",
- " (2, 0, 0.318546250041731),\n",
- " (2, 3, 0.237866867733431),\n",
- " (2, 2, 0.159148764893974),\n",
- " (2, 1, 0.111683344379718),\n",
- " (2, 4, 0.111683344379718),\n",
- " (2, 6, 0.0396428571428571),\n",
- " (2, 5, 0.0214285714285714)]"
- ]
- },
- "execution_count": 22,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "%%sql\n",
- "DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;\n",
- "\n",
- "SELECT madlib.pagerank(\n",
- " 'vertex', -- Vertex table\n",
- " 'id', -- Vertix id column\n",
- " 'edge', -- Edge table\n",
- " 'src=src, dest=dest', -- Comma delimted string of edge arguments\n",
- " 'pagerank_out', -- Output table of PageRank\n",
- " NULL, -- Default damping factor (0.85)\n",
- " NULL, -- Default max iters (100)\n",
- " 0.00000001, -- Threshold\n",
- " 'user_id'); -- Grouping column name\n",
- "\n",
- "SELECT * FROM pagerank_out ORDER BY user_id, pagerank DESC;"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "2 rows affected.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "<table>\n",
- " <tr>\n",
- " <th>user_id</th>\n",
- " <th>__iterations__</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>1</td>\n",
- " <td>27</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <td>2</td>\n",
- " <td>31</td>\n",
- " </tr>\n",
- "</table>"
- ],
- "text/plain": [
- "[(1, 27), (2, 31)]"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "%%sql\n",
- "SELECT * FROM pagerank_out_summary ORDER BY user_id;"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
http://git-wip-us.apache.org/repos/asf/madlib-site/blob/3f849b9e/community-artifacts/PageRank-v2.ipynb
----------------------------------------------------------------------
diff --git a/community-artifacts/PageRank-v2.ipynb b/community-artifacts/PageRank-v2.ipynb
new file mode 100644
index 0000000..eea174f
--- /dev/null
+++ b/community-artifacts/PageRank-v2.ipynb
@@ -0,0 +1,889 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# PageRank\n",
+ "The PageRank algorithm produces a probability distribution representing the likelihood that a person randomly traversing a graph will arrive at any particular vertex. PageRank was added in MADlib 1.11.\n",
+ "\n",
+ "We also implement personalized PageRank, in which a notion of importance provides personalization to a query. This was added in 1.14"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated. You should import from traitlets.config instead.\n",
+ " \"You should import from traitlets.config instead.\", ShimWarning)\n",
+ "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n",
+ " warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n"
+ ]
+ }
+ ],
+ "source": [
+ "%load_ext sql"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "u'Connected: gpadmin@madlib'"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Greenplum Database 5.4.0 on GCP (demo machine)\n",
+ "%sql postgresql://gpadmin@35.184.253.255:5432/madlib\n",
+ " \n",
+ "# PostgreSQL local\n",
+ "#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
+ "\n",
+ "# Greenplum Database 4.3.10.0\n",
+ "#%sql postgresql://gpdbchina@10.194.10.68:61000/madlib"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>version</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>MADlib version: 1.14-dev, git revision: rc/1.13-rc1-66-g4cced1b, cmake configuration time: Mon Apr 23 16:26:17 UTC 2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(u'MADlib version: 1.14-dev, git revision: rc/1.13-rc1-66-g4cced1b, cmake configuration time: Mon Apr 23 16:26:17 UTC 2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7',)]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%sql select madlib.version();\n",
+ "#%sql select version();"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 1. Create vertex and edge tables"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Done.\n",
+ "Done.\n",
+ "Done.\n",
+ "7 rows affected.\n",
+ "22 rows affected.\n",
+ "22 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>src</th>\n",
+ " <th>dest</th>\n",
+ " <th>user_id</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>0</td>\n",
+ " <td>1</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>0</td>\n",
+ " <td>2</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>0</td>\n",
+ " <td>2</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>0</td>\n",
+ " <td>4</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>0</td>\n",
+ " <td>4</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>0</td>\n",
+ " <td>1</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>3</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>3</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>2</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>2</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>5</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>3</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>3</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>6</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>3</td>\n",
+ " <td>0</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>3</td>\n",
+ " <td>0</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>4</td>\n",
+ " <td>0</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>4</td>\n",
+ " <td>0</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>5</td>\n",
+ " <td>6</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>5</td>\n",
+ " <td>6</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>6</td>\n",
+ " <td>3</td>\n",
+ " <td>1</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>6</td>\n",
+ " <td>3</td>\n",
+ " <td>2</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(0, 1, 1),\n",
+ " (0, 2, 2),\n",
+ " (0, 2, 1),\n",
+ " (0, 4, 2),\n",
+ " (0, 4, 1),\n",
+ " (0, 1, 2),\n",
+ " (1, 3, 1),\n",
+ " (1, 3, 2),\n",
+ " (1, 2, 2),\n",
+ " (1, 2, 1),\n",
+ " (2, 5, 1),\n",
+ " (2, 3, 1),\n",
+ " (2, 3, 2),\n",
+ " (2, 6, 1),\n",
+ " (3, 0, 2),\n",
+ " (3, 0, 1),\n",
+ " (4, 0, 1),\n",
+ " (4, 0, 2),\n",
+ " (5, 6, 2),\n",
+ " (5, 6, 1),\n",
+ " (6, 3, 1),\n",
+ " (6, 3, 2)]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql \n",
+ "DROP TABLE IF EXISTS vertex, edge;\n",
+ "\n",
+ "CREATE TABLE vertex(\n",
+ " id INTEGER\n",
+ " );\n",
+ "\n",
+ "CREATE TABLE edge(\n",
+ " src INTEGER,\n",
+ " dest INTEGER,\n",
+ " user_id INTEGER\n",
+ " );\n",
+ "\n",
+ "INSERT INTO vertex VALUES\n",
+ "(0),\n",
+ "(1),\n",
+ "(2),\n",
+ "(3),\n",
+ "(4),\n",
+ "(5),\n",
+ "(6);\n",
+ "\n",
+ "INSERT INTO edge VALUES\n",
+ "(0, 1, 1),\n",
+ "(0, 2, 1),\n",
+ "(0, 4, 1),\n",
+ "(1, 2, 1),\n",
+ "(1, 3, 1),\n",
+ "(2, 3, 1),\n",
+ "(2, 5, 1),\n",
+ "(2, 6, 1),\n",
+ "(3, 0, 1),\n",
+ "(4, 0, 1),\n",
+ "(5, 6, 1),\n",
+ "(6, 3, 1),\n",
+ "(0, 1, 2),\n",
+ "(0, 2, 2),\n",
+ "(0, 4, 2),\n",
+ "(1, 2, 2),\n",
+ "(1, 3, 2),\n",
+ "(2, 3, 2),\n",
+ "(3, 0, 2),\n",
+ "(4, 0, 2),\n",
+ "(5, 6, 2),\n",
+ "(6, 3, 2);\n",
+ "\n",
+ "SELECT * from edge ORDER BY src;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 2. Calculate PageRank"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Done.\n",
+ "1 rows affected.\n",
+ "7 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>id</th>\n",
+ " <th>pagerank</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>0</td>\n",
+ " <td>0.287518161212</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>3</td>\n",
+ " <td>0.210171199451</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>0.146637377532</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>4</td>\n",
+ " <td>0.102910437211</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>0.102910437211</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>6</td>\n",
+ " <td>0.0972746644343</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>5</td>\n",
+ " <td>0.0525777229482</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(0, 0.287518161212111),\n",
+ " (3, 0.210171199451415),\n",
+ " (2, 0.146637377532288),\n",
+ " (4, 0.102910437211324),\n",
+ " (1, 0.102910437211324),\n",
+ " (6, 0.0972746644343418),\n",
+ " (5, 0.0525777229481976)]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;\n",
+ "\n",
+ "SELECT madlib.pagerank(\n",
+ " 'vertex', -- Vertex table\n",
+ " 'id', -- Vertix id column\n",
+ " 'edge', -- Edge table\n",
+ " 'src=src, dest=dest', -- Comma delimted string of edge arguments\n",
+ " 'pagerank_out'); -- Output table of PageRank\n",
+ "\n",
+ "SELECT * FROM pagerank_out ORDER BY pagerank DESC;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Look at the summary table:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>__iterations__</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>12</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(12,)]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM pagerank_out_summary;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now run PageRank with a damping factor of 0.5 which results in different final values:"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 3. Calculate PageRank with optional params"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Done.\n",
+ "1 rows affected.\n",
+ "7 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>id</th>\n",
+ " <th>pagerank</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>0</td>\n",
+ " <td>0.225477430556</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>3</td>\n",
+ " <td>0.199105076058</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>0.136259748402</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>6</td>\n",
+ " <td>0.132687846189</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>4</td>\n",
+ " <td>0.109006420855</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>0.109006420855</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>5</td>\n",
+ " <td>0.088457057085</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(0, 0.225477430555642),\n",
+ " (3, 0.199105076058278),\n",
+ " (2, 0.136259748401728),\n",
+ " (6, 0.132687846188875),\n",
+ " (4, 0.109006420855422),\n",
+ " (1, 0.109006420855422),\n",
+ " (5, 0.0884570570850217)]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;\n",
+ "SELECT madlib.pagerank(\n",
+ " 'vertex', -- Vertex table\n",
+ " 'id', -- Vertix id column\n",
+ " 'edge', -- Edge table\n",
+ " 'src=src, dest=dest', -- Comma delimted string of edge arguments\n",
+ " 'pagerank_out', -- Output table of PageRank\n",
+ " 0.5); -- Damping factor\n",
+ "SELECT * FROM pagerank_out ORDER BY pagerank DESC;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 4. Grouping\n",
+ "Now compute the PageRank distribution separately for each user using the grouping feature:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Done.\n",
+ "1 rows affected.\n",
+ "14 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>user_id</th>\n",
+ " <th>id</th>\n",
+ " <th>pagerank</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>0</td>\n",
+ " <td>0.278254883886</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>3</td>\n",
+ " <td>0.201881146671</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>2</td>\n",
+ " <td>0.142881123461</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>6</td>\n",
+ " <td>0.114536378321</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>4</td>\n",
+ " <td>0.100267456154</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>1</td>\n",
+ " <td>0.100267456154</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>5</td>\n",
+ " <td>0.0619115553529</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>0</td>\n",
+ " <td>0.318546250042</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>3</td>\n",
+ " <td>0.237866867733</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>2</td>\n",
+ " <td>0.159148764894</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>1</td>\n",
+ " <td>0.11168334438</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>4</td>\n",
+ " <td>0.11168334438</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>6</td>\n",
+ " <td>0.0396428571429</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>5</td>\n",
+ " <td>0.0214285714286</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(1, 0, 0.278254883885528),\n",
+ " (1, 3, 0.201881146670752),\n",
+ " (1, 2, 0.142881123460599),\n",
+ " (1, 6, 0.114536378321472),\n",
+ " (1, 4, 0.10026745615438),\n",
+ " (1, 1, 0.10026745615438),\n",
+ " (1, 5, 0.0619115553528898),\n",
+ " (2, 0, 0.318546250041731),\n",
+ " (2, 3, 0.237866867733431),\n",
+ " (2, 2, 0.159148764893974),\n",
+ " (2, 1, 0.111683344379718),\n",
+ " (2, 4, 0.111683344379718),\n",
+ " (2, 6, 0.0396428571428571),\n",
+ " (2, 5, 0.0214285714285714)]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;\n",
+ "\n",
+ "SELECT madlib.pagerank(\n",
+ " 'vertex', -- Vertex table\n",
+ " 'id', -- Vertix id column\n",
+ " 'edge', -- Edge table\n",
+ " 'src=src, dest=dest', -- Comma delimted string of edge arguments\n",
+ " 'pagerank_out', -- Output table of PageRank\n",
+ " NULL, -- Default damping factor (0.85)\n",
+ " NULL, -- Default max iters (100)\n",
+ " 0.00000001, -- Threshold\n",
+ " 'user_id'); -- Grouping column name\n",
+ "\n",
+ "SELECT * FROM pagerank_out ORDER BY user_id, pagerank DESC;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>user_id</th>\n",
+ " <th>__iterations__</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>27</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>31</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(1, 27), (2, 31)]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM pagerank_out_summary ORDER BY user_id;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 5. Personalized PageRank\n",
+ "Here we specify {2,4} as the personalization vertices. This parameter could be specified as ARRAY[2,4] as well:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Done.\n",
+ "1 rows affected.\n",
+ "7 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>id</th>\n",
+ " <th>pagerank</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>0</td>\n",
+ " <td>0.282616480981</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>2</td>\n",
+ " <td>0.189069710497</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>3</td>\n",
+ " <td>0.177501646133</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>4</td>\n",
+ " <td>0.15505560795</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>1</td>\n",
+ " <td>0.0800556079496</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>6</td>\n",
+ " <td>0.0743076577868</td>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>5</td>\n",
+ " <td>0.0401701653568</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(0, 0.282616480980651),\n",
+ " (2, 0.18906971049691),\n",
+ " (3, 0.177501646133459),\n",
+ " (4, 0.155055607949638),\n",
+ " (1, 0.0800556079496381),\n",
+ " (6, 0.0743076577867786),\n",
+ " (5, 0.0401701653567887)]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;\n",
+ "SELECT madlib.pagerank(\n",
+ " 'vertex', -- Vertex table\n",
+ " 'id', -- Vertix id column\n",
+ " 'edge', -- Edge table\n",
+ " 'src=src, dest=dest', -- Comma delimted string of edge arguments\n",
+ " 'pagerank_out', -- Output table of PageRank\n",
+ " NULL, -- Default damping factor (0.85)\n",
+ " NULL, -- Default max iters (100)\n",
+ " NULL, -- Default Threshold\n",
+ " NULL, -- No Grouping\n",
+ " '{2,4}'); -- Personalization vertices\n",
+ "SELECT * FROM pagerank_out ORDER BY pagerank DESC;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1 rows affected.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "<table>\n",
+ " <tr>\n",
+ " <th>__iterations__</th>\n",
+ " </tr>\n",
+ " <tr>\n",
+ " <td>37</td>\n",
+ " </tr>\n",
+ "</table>"
+ ],
+ "text/plain": [
+ "[(37,)]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "%%sql\n",
+ "SELECT * FROM pagerank_out_summary;"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 2",
+ "language": "python",
+ "name": "python2"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 2
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython2",
+ "version": "2.7.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}