You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by fm...@apache.org on 2018/04/23 21:57:52 UTC

[08/15] madlib-site git commit: jupyter notebooks for 1.14 release

http://git-wip-us.apache.org/repos/asf/madlib-site/blob/3f849b9e/community-artifacts/Minibatch-preprocessor-v1.ipynb
----------------------------------------------------------------------
diff --git a/community-artifacts/Minibatch-preprocessor-v1.ipynb b/community-artifacts/Minibatch-preprocessor-v1.ipynb
new file mode 100644
index 0000000..fe03a27
--- /dev/null
+++ b/community-artifacts/Minibatch-preprocessor-v1.ipynb
@@ -0,0 +1,1330 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Mini-batch preprocessor\n",
+    "\n",
+    "The mini-batch preprocessor is a utility that prepares input data for use by models that support mini-batch as an optimization option. (This is currently only the case for Neural Networks.) It is effectively a packing operation that builds arrays of dependent and independent variables from the source data table.\n",
+    "\n",
+    "The mini-batch preprocessor was added in MADlib 1.14."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/config.py:13: ShimWarning: The `IPython.config` package has been deprecated. You should import from traitlets.config instead.\n",
+      "  \"You should import from traitlets.config instead.\", ShimWarning)\n",
+      "/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/utils/traitlets.py:5: UserWarning: IPython.utils.traitlets has moved to a top-level traitlets package.\n",
+      "  warn(\"IPython.utils.traitlets has moved to a top-level traitlets package.\")\n"
+     ]
+    }
+   ],
+   "source": [
+    "%load_ext sql"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "u'Connected: gpadmin@madlib'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Greenplum Database 5.4.0 on GCP (demo machine)\n",
+    "%sql postgresql://gpadmin@35.184.253.255:5432/madlib\n",
+    "        \n",
+    "# PostgreSQL local\n",
+    "#%sql postgresql://fmcquillan@localhost:5432/madlib\n",
+    "\n",
+    "# Greenplum Database 4.3.10.0\n",
+    "#%sql postgresql://gpdbchina@10.194.10.68:61000/madlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>version</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>MADlib version: 1.14-dev, git revision: rc/1.13-rc1-66-g4cced1b, cmake configuration time: Mon Apr 23 16:26:17 UTC 2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(u'MADlib version: 1.14-dev, git revision: rc/1.13-rc1-66-g4cced1b, cmake configuration time: Mon Apr 23 16:26:17 UTC 2018, build type: release, build system: Linux-2.6.32-696.20.1.el6.x86_64, C compiler: gcc 4.4.7, C++ compiler: g++ 4.4.7',)]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%sql select madlib.version();\n",
+    "#%sql select version();"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 1. Load data\n",
+    "Based on the well known iris dataset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "Done.\n",
+      "52 rows affected.\n",
+      "52 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>id</th>\n",
+       "        <th>attributes</th>\n",
+       "        <th>class_text</th>\n",
+       "        <th>class</th>\n",
+       "        <th>state</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>[Decimal('5.0'), Decimal('3.2'), Decimal('1.2'), Decimal('0.2')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>[Decimal('5.5'), Decimal('3.5'), Decimal('1.3'), Decimal('0.2')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>[Decimal('4.9'), Decimal('3.1'), Decimal('1.5'), Decimal('0.1')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>4</td>\n",
+       "        <td>[Decimal('4.4'), Decimal('3.0'), Decimal('1.3'), Decimal('0.2')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>5</td>\n",
+       "        <td>[Decimal('5.1'), Decimal('3.4'), Decimal('1.5'), Decimal('0.2')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>6</td>\n",
+       "        <td>[Decimal('5.0'), Decimal('3.5'), Decimal('1.3'), Decimal('0.3')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>7</td>\n",
+       "        <td>[Decimal('4.5'), Decimal('2.3'), Decimal('1.3'), Decimal('0.3')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>8</td>\n",
+       "        <td>[Decimal('4.4'), Decimal('3.2'), Decimal('1.3'), Decimal('0.2')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>9</td>\n",
+       "        <td>[Decimal('5.0'), Decimal('3.5'), Decimal('1.6'), Decimal('0.6')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>10</td>\n",
+       "        <td>[Decimal('5.1'), Decimal('3.8'), Decimal('1.9'), Decimal('0.4')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>11</td>\n",
+       "        <td>[Decimal('4.8'), Decimal('3.0'), Decimal('1.4'), Decimal('0.3')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>12</td>\n",
+       "        <td>[Decimal('5.1'), Decimal('3.8'), Decimal('1.6'), Decimal('0.2')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>13</td>\n",
+       "        <td>[Decimal('5.7'), Decimal('2.8'), Decimal('4.5'), Decimal('1.3')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>14</td>\n",
+       "        <td>[Decimal('6.3'), Decimal('3.3'), Decimal('4.7'), Decimal('1.6')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>15</td>\n",
+       "        <td>[Decimal('4.9'), Decimal('2.4'), Decimal('3.3'), Decimal('1.0')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>16</td>\n",
+       "        <td>[Decimal('6.6'), Decimal('2.9'), Decimal('4.6'), Decimal('1.3')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>17</td>\n",
+       "        <td>[Decimal('5.2'), Decimal('2.7'), Decimal('3.9'), Decimal('1.4')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>18</td>\n",
+       "        <td>[Decimal('5.0'), Decimal('2.0'), Decimal('3.5'), Decimal('1.0')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>19</td>\n",
+       "        <td>[Decimal('5.9'), Decimal('3.0'), Decimal('4.2'), Decimal('1.5')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>20</td>\n",
+       "        <td>[Decimal('6.0'), Decimal('2.2'), Decimal('4.0'), Decimal('1.0')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>21</td>\n",
+       "        <td>[Decimal('6.1'), Decimal('2.9'), Decimal('4.7'), Decimal('1.4')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>22</td>\n",
+       "        <td>[Decimal('5.6'), Decimal('2.9'), Decimal('3.6'), Decimal('1.3')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>23</td>\n",
+       "        <td>[Decimal('6.7'), Decimal('3.1'), Decimal('4.4'), Decimal('1.4')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>24</td>\n",
+       "        <td>[Decimal('5.6'), Decimal('3.0'), Decimal('4.5'), Decimal('1.5')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>25</td>\n",
+       "        <td>[Decimal('5.8'), Decimal('2.7'), Decimal('4.1'), Decimal('1.0')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>26</td>\n",
+       "        <td>[Decimal('6.2'), Decimal('2.2'), Decimal('4.5'), Decimal('1.5')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>27</td>\n",
+       "        <td>[Decimal('5.6'), Decimal('2.5'), Decimal('3.9'), Decimal('1.1')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>28</td>\n",
+       "        <td>[Decimal('5.0'), Decimal('3.4'), Decimal('1.5'), Decimal('0.2')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>29</td>\n",
+       "        <td>[Decimal('4.4'), Decimal('2.9'), Decimal('1.4'), Decimal('0.2')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>30</td>\n",
+       "        <td>[Decimal('4.9'), Decimal('3.1'), Decimal('1.5'), Decimal('0.1')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>31</td>\n",
+       "        <td>[Decimal('5.4'), Decimal('3.7'), Decimal('1.5'), Decimal('0.2')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>32</td>\n",
+       "        <td>[Decimal('4.8'), Decimal('3.4'), Decimal('1.6'), Decimal('0.2')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>33</td>\n",
+       "        <td>[Decimal('4.8'), Decimal('3.0'), Decimal('1.4'), Decimal('0.1')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>34</td>\n",
+       "        <td>[Decimal('4.3'), Decimal('3.0'), Decimal('1.1'), Decimal('0.1')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>35</td>\n",
+       "        <td>[Decimal('5.8'), Decimal('4.0'), Decimal('1.2'), Decimal('0.2')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>36</td>\n",
+       "        <td>[Decimal('5.7'), Decimal('4.4'), Decimal('1.5'), Decimal('0.4')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>37</td>\n",
+       "        <td>[Decimal('5.4'), Decimal('3.9'), Decimal('1.3'), Decimal('0.4')]</td>\n",
+       "        <td>Iris_setosa</td>\n",
+       "        <td>1</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>38</td>\n",
+       "        <td>[Decimal('6.0'), Decimal('2.9'), Decimal('4.5'), Decimal('1.5')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>39</td>\n",
+       "        <td>[Decimal('5.7'), Decimal('2.6'), Decimal('3.5'), Decimal('1.0')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>40</td>\n",
+       "        <td>[Decimal('5.5'), Decimal('2.4'), Decimal('3.8'), Decimal('1.1')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>41</td>\n",
+       "        <td>[Decimal('5.5'), Decimal('2.4'), Decimal('3.7'), Decimal('1.0')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>42</td>\n",
+       "        <td>[Decimal('5.8'), Decimal('2.7'), Decimal('3.9'), Decimal('1.2')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>43</td>\n",
+       "        <td>[Decimal('6.0'), Decimal('2.7'), Decimal('5.1'), Decimal('1.6')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>44</td>\n",
+       "        <td>[Decimal('5.4'), Decimal('3.0'), Decimal('4.5'), Decimal('1.5')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>45</td>\n",
+       "        <td>[Decimal('6.0'), Decimal('3.4'), Decimal('4.5'), Decimal('1.6')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>46</td>\n",
+       "        <td>[Decimal('6.7'), Decimal('3.1'), Decimal('4.7'), Decimal('1.5')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>47</td>\n",
+       "        <td>[Decimal('6.3'), Decimal('2.3'), Decimal('4.4'), Decimal('1.3')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>48</td>\n",
+       "        <td>[Decimal('5.6'), Decimal('3.0'), Decimal('4.1'), Decimal('1.3')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>49</td>\n",
+       "        <td>[Decimal('5.5'), Decimal('2.5'), Decimal('4.0'), Decimal('1.3')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>50</td>\n",
+       "        <td>[Decimal('5.5'), Decimal('2.6'), Decimal('4.4'), Decimal('1.2')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>51</td>\n",
+       "        <td>[Decimal('6.1'), Decimal('3.0'), Decimal('4.6'), Decimal('1.4')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>52</td>\n",
+       "        <td>[Decimal('5.8'), Decimal('2.6'), Decimal('4.0'), Decimal('1.2')]</td>\n",
+       "        <td>Iris_versicolor</td>\n",
+       "        <td>2</td>\n",
+       "        <td>Tennessee</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(1, [Decimal('5.0'), Decimal('3.2'), Decimal('1.2'), Decimal('0.2')], u'Iris_setosa', 1, u'Alaska'),\n",
+       " (2, [Decimal('5.5'), Decimal('3.5'), Decimal('1.3'), Decimal('0.2')], u'Iris_setosa', 1, u'Alaska'),\n",
+       " (3, [Decimal('4.9'), Decimal('3.1'), Decimal('1.5'), Decimal('0.1')], u'Iris_setosa', 1, u'Alaska'),\n",
+       " (4, [Decimal('4.4'), Decimal('3.0'), Decimal('1.3'), Decimal('0.2')], u'Iris_setosa', 1, u'Alaska'),\n",
+       " (5, [Decimal('5.1'), Decimal('3.4'), Decimal('1.5'), Decimal('0.2')], u'Iris_setosa', 1, u'Alaska'),\n",
+       " (6, [Decimal('5.0'), Decimal('3.5'), Decimal('1.3'), Decimal('0.3')], u'Iris_setosa', 1, u'Alaska'),\n",
+       " (7, [Decimal('4.5'), Decimal('2.3'), Decimal('1.3'), Decimal('0.3')], u'Iris_setosa', 1, u'Alaska'),\n",
+       " (8, [Decimal('4.4'), Decimal('3.2'), Decimal('1.3'), Decimal('0.2')], u'Iris_setosa', 1, u'Alaska'),\n",
+       " (9, [Decimal('5.0'), Decimal('3.5'), Decimal('1.6'), Decimal('0.6')], u'Iris_setosa', 1, u'Alaska'),\n",
+       " (10, [Decimal('5.1'), Decimal('3.8'), Decimal('1.9'), Decimal('0.4')], u'Iris_setosa', 1, u'Alaska'),\n",
+       " (11, [Decimal('4.8'), Decimal('3.0'), Decimal('1.4'), Decimal('0.3')], u'Iris_setosa', 1, u'Alaska'),\n",
+       " (12, [Decimal('5.1'), Decimal('3.8'), Decimal('1.6'), Decimal('0.2')], u'Iris_setosa', 1, u'Alaska'),\n",
+       " (13, [Decimal('5.7'), Decimal('2.8'), Decimal('4.5'), Decimal('1.3')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (14, [Decimal('6.3'), Decimal('3.3'), Decimal('4.7'), Decimal('1.6')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (15, [Decimal('4.9'), Decimal('2.4'), Decimal('3.3'), Decimal('1.0')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (16, [Decimal('6.6'), Decimal('2.9'), Decimal('4.6'), Decimal('1.3')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (17, [Decimal('5.2'), Decimal('2.7'), Decimal('3.9'), Decimal('1.4')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (18, [Decimal('5.0'), Decimal('2.0'), Decimal('3.5'), Decimal('1.0')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (19, [Decimal('5.9'), Decimal('3.0'), Decimal('4.2'), Decimal('1.5')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (20, [Decimal('6.0'), Decimal('2.2'), Decimal('4.0'), Decimal('1.0')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (21, [Decimal('6.1'), Decimal('2.9'), Decimal('4.7'), Decimal('1.4')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (22, [Decimal('5.6'), Decimal('2.9'), Decimal('3.6'), Decimal('1.3')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (23, [Decimal('6.7'), Decimal('3.1'), Decimal('4.4'), Decimal('1.4')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (24, [Decimal('5.6'), Decimal('3.0'), Decimal('4.5'), Decimal('1.5')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (25, [Decimal('5.8'), Decimal('2.7'), Decimal('4.1'), Decimal('1.0')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (26, [Decimal('6.2'), Decimal('2.2'), Decimal('4.5'), Decimal('1.5')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (27, [Decimal('5.6'), Decimal('2.5'), Decimal('3.9'), Decimal('1.1')], u'Iris_versicolor', 2, u'Alaska'),\n",
+       " (28, [Decimal('5.0'), Decimal('3.4'), Decimal('1.5'), Decimal('0.2')], u'Iris_setosa', 1, u'Tennessee'),\n",
+       " (29, [Decimal('4.4'), Decimal('2.9'), Decimal('1.4'), Decimal('0.2')], u'Iris_setosa', 1, u'Tennessee'),\n",
+       " (30, [Decimal('4.9'), Decimal('3.1'), Decimal('1.5'), Decimal('0.1')], u'Iris_setosa', 1, u'Tennessee'),\n",
+       " (31, [Decimal('5.4'), Decimal('3.7'), Decimal('1.5'), Decimal('0.2')], u'Iris_setosa', 1, u'Tennessee'),\n",
+       " (32, [Decimal('4.8'), Decimal('3.4'), Decimal('1.6'), Decimal('0.2')], u'Iris_setosa', 1, u'Tennessee'),\n",
+       " (33, [Decimal('4.8'), Decimal('3.0'), Decimal('1.4'), Decimal('0.1')], u'Iris_setosa', 1, u'Tennessee'),\n",
+       " (34, [Decimal('4.3'), Decimal('3.0'), Decimal('1.1'), Decimal('0.1')], u'Iris_setosa', 1, u'Tennessee'),\n",
+       " (35, [Decimal('5.8'), Decimal('4.0'), Decimal('1.2'), Decimal('0.2')], u'Iris_setosa', 1, u'Tennessee'),\n",
+       " (36, [Decimal('5.7'), Decimal('4.4'), Decimal('1.5'), Decimal('0.4')], u'Iris_setosa', 1, u'Tennessee'),\n",
+       " (37, [Decimal('5.4'), Decimal('3.9'), Decimal('1.3'), Decimal('0.4')], u'Iris_setosa', 1, u'Tennessee'),\n",
+       " (38, [Decimal('6.0'), Decimal('2.9'), Decimal('4.5'), Decimal('1.5')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (39, [Decimal('5.7'), Decimal('2.6'), Decimal('3.5'), Decimal('1.0')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (40, [Decimal('5.5'), Decimal('2.4'), Decimal('3.8'), Decimal('1.1')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (41, [Decimal('5.5'), Decimal('2.4'), Decimal('3.7'), Decimal('1.0')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (42, [Decimal('5.8'), Decimal('2.7'), Decimal('3.9'), Decimal('1.2')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (43, [Decimal('6.0'), Decimal('2.7'), Decimal('5.1'), Decimal('1.6')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (44, [Decimal('5.4'), Decimal('3.0'), Decimal('4.5'), Decimal('1.5')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (45, [Decimal('6.0'), Decimal('3.4'), Decimal('4.5'), Decimal('1.6')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (46, [Decimal('6.7'), Decimal('3.1'), Decimal('4.7'), Decimal('1.5')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (47, [Decimal('6.3'), Decimal('2.3'), Decimal('4.4'), Decimal('1.3')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (48, [Decimal('5.6'), Decimal('3.0'), Decimal('4.1'), Decimal('1.3')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (49, [Decimal('5.5'), Decimal('2.5'), Decimal('4.0'), Decimal('1.3')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (50, [Decimal('5.5'), Decimal('2.6'), Decimal('4.4'), Decimal('1.2')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (51, [Decimal('6.1'), Decimal('3.0'), Decimal('4.6'), Decimal('1.4')], u'Iris_versicolor', 2, u'Tennessee'),\n",
+       " (52, [Decimal('5.8'), Decimal('2.6'), Decimal('4.0'), Decimal('1.2')], u'Iris_versicolor', 2, u'Tennessee')]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS iris_data;\n",
+    "\n",
+    "CREATE TABLE iris_data(\n",
+    "    id serial,\n",
+    "    attributes numeric[],\n",
+    "    class_text varchar,\n",
+    "    class integer,\n",
+    "    state varchar\n",
+    ");\n",
+    "\n",
+    "INSERT INTO iris_data(id, attributes, class_text, class, state) VALUES\n",
+    "(1,ARRAY[5.0,3.2,1.2,0.2],'Iris_setosa',1,'Alaska'),\n",
+    "(2,ARRAY[5.5,3.5,1.3,0.2],'Iris_setosa',1,'Alaska'),\n",
+    "(3,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'Alaska'),\n",
+    "(4,ARRAY[4.4,3.0,1.3,0.2],'Iris_setosa',1,'Alaska'),\n",
+    "(5,ARRAY[5.1,3.4,1.5,0.2],'Iris_setosa',1,'Alaska'),\n",
+    "(6,ARRAY[5.0,3.5,1.3,0.3],'Iris_setosa',1,'Alaska'),\n",
+    "(7,ARRAY[4.5,2.3,1.3,0.3],'Iris_setosa',1,'Alaska'),\n",
+    "(8,ARRAY[4.4,3.2,1.3,0.2],'Iris_setosa',1,'Alaska'),\n",
+    "(9,ARRAY[5.0,3.5,1.6,0.6],'Iris_setosa',1,'Alaska'),\n",
+    "(10,ARRAY[5.1,3.8,1.9,0.4],'Iris_setosa',1,'Alaska'),\n",
+    "(11,ARRAY[4.8,3.0,1.4,0.3],'Iris_setosa',1,'Alaska'),\n",
+    "(12,ARRAY[5.1,3.8,1.6,0.2],'Iris_setosa',1,'Alaska'),\n",
+    "(13,ARRAY[5.7,2.8,4.5,1.3],'Iris_versicolor',2,'Alaska'),\n",
+    "(14,ARRAY[6.3,3.3,4.7,1.6],'Iris_versicolor',2,'Alaska'),\n",
+    "(15,ARRAY[4.9,2.4,3.3,1.0],'Iris_versicolor',2,'Alaska'),\n",
+    "(16,ARRAY[6.6,2.9,4.6,1.3],'Iris_versicolor',2,'Alaska'),\n",
+    "(17,ARRAY[5.2,2.7,3.9,1.4],'Iris_versicolor',2,'Alaska'),\n",
+    "(18,ARRAY[5.0,2.0,3.5,1.0],'Iris_versicolor',2,'Alaska'),\n",
+    "(19,ARRAY[5.9,3.0,4.2,1.5],'Iris_versicolor',2,'Alaska'),\n",
+    "(20,ARRAY[6.0,2.2,4.0,1.0],'Iris_versicolor',2,'Alaska'),\n",
+    "(21,ARRAY[6.1,2.9,4.7,1.4],'Iris_versicolor',2,'Alaska'),\n",
+    "(22,ARRAY[5.6,2.9,3.6,1.3],'Iris_versicolor',2,'Alaska'),\n",
+    "(23,ARRAY[6.7,3.1,4.4,1.4],'Iris_versicolor',2,'Alaska'),\n",
+    "(24,ARRAY[5.6,3.0,4.5,1.5],'Iris_versicolor',2,'Alaska'),\n",
+    "(25,ARRAY[5.8,2.7,4.1,1.0],'Iris_versicolor',2,'Alaska'),\n",
+    "(26,ARRAY[6.2,2.2,4.5,1.5],'Iris_versicolor',2,'Alaska'),\n",
+    "(27,ARRAY[5.6,2.5,3.9,1.1],'Iris_versicolor',2,'Alaska'),\n",
+    "(28,ARRAY[5.0,3.4,1.5,0.2],'Iris_setosa',1,'Tennessee'),\n",
+    "(29,ARRAY[4.4,2.9,1.4,0.2],'Iris_setosa',1,'Tennessee'),\n",
+    "(30,ARRAY[4.9,3.1,1.5,0.1],'Iris_setosa',1,'Tennessee'),\n",
+    "(31,ARRAY[5.4,3.7,1.5,0.2],'Iris_setosa',1,'Tennessee'),\n",
+    "(32,ARRAY[4.8,3.4,1.6,0.2],'Iris_setosa',1,'Tennessee'),\n",
+    "(33,ARRAY[4.8,3.0,1.4,0.1],'Iris_setosa',1,'Tennessee'),\n",
+    "(34,ARRAY[4.3,3.0,1.1,0.1],'Iris_setosa',1,'Tennessee'),\n",
+    "(35,ARRAY[5.8,4.0,1.2,0.2],'Iris_setosa',1,'Tennessee'),\n",
+    "(36,ARRAY[5.7,4.4,1.5,0.4],'Iris_setosa',1,'Tennessee'),\n",
+    "(37,ARRAY[5.4,3.9,1.3,0.4],'Iris_setosa',1,'Tennessee'),\n",
+    "(38,ARRAY[6.0,2.9,4.5,1.5],'Iris_versicolor',2,'Tennessee'),\n",
+    "(39,ARRAY[5.7,2.6,3.5,1.0],'Iris_versicolor',2,'Tennessee'),\n",
+    "(40,ARRAY[5.5,2.4,3.8,1.1],'Iris_versicolor',2,'Tennessee'),\n",
+    "(41,ARRAY[5.5,2.4,3.7,1.0],'Iris_versicolor',2,'Tennessee'),\n",
+    "(42,ARRAY[5.8,2.7,3.9,1.2],'Iris_versicolor',2,'Tennessee'),\n",
+    "(43,ARRAY[6.0,2.7,5.1,1.6],'Iris_versicolor',2,'Tennessee'),\n",
+    "(44,ARRAY[5.4,3.0,4.5,1.5],'Iris_versicolor',2,'Tennessee'),\n",
+    "(45,ARRAY[6.0,3.4,4.5,1.6],'Iris_versicolor',2,'Tennessee'),\n",
+    "(46,ARRAY[6.7,3.1,4.7,1.5],'Iris_versicolor',2,'Tennessee'),\n",
+    "(47,ARRAY[6.3,2.3,4.4,1.3],'Iris_versicolor',2,'Tennessee'),\n",
+    "(48,ARRAY[5.6,3.0,4.1,1.3],'Iris_versicolor',2,'Tennessee'),\n",
+    "(49,ARRAY[5.5,2.5,4.0,1.3],'Iris_versicolor',2,'Tennessee'),\n",
+    "(50,ARRAY[5.5,2.6,4.4,1.2],'Iris_versicolor',2,'Tennessee'),\n",
+    "(51,ARRAY[6.1,3.0,4.6,1.4],'Iris_versicolor',2,'Tennessee'),\n",
+    "(52,ARRAY[5.8,2.6,4.0,1.2],'Iris_versicolor',2,'Tennessee');\n",
+    "\n",
+    "SELECT * FROM iris_data ORDER BY id;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 2.  Run preprocessor \n",
+    "\n",
+    "Run the preprocessor to generate the packed output table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "2 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>__id__</th>\n",
+       "        <th>dependent_varname</th>\n",
+       "        <th>independent_varname</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>[[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]</td>\n",
+       "        <td>[[-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.00286528298202], [-0.767560815504508, 0.806649237861967, -1.07515071152907, -1.18456909732025], [-0.0995580974152422, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [-0.433559456459875, -0.598232688377286, 0.616889752516682, 0.995876674738521], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [1.23644733876329, -1.60171977854818, 1.03989986852812, 1.17758048907675], [1.06944665924097, -0.196837852308928, 1.18090324053193, 0.995876674738521], [0.0674425821070736, -0.798930106411465, 0.969398182526215, 0.632469046062059], [0.568444620674023, -0.598232688377286, 0.616889752516682, 0.632469046062059], [-0.600560135982193, 1.60943890999868, -0.793143967521448, -0.821161468643789], [-1.60256421311609, -1.401022360514, -1.21615408353289, -1.00286528298202], [-0.600560135982193, 0.806649237861967, -1.07515071152907, -1.18456909732025], [-0.0995580974152422, 1.810136328
 03286, -1.21615408353289, -0.821161468643789], [0.401443941151707, -0.397535270343108, 1.03989986852812, 0.81417286040029], [-0.767560815504508, -2.00311461461654, 0.334883008509056, 0.269061417385597], [0.234443261629389, -0.196837852308928, 0.405384694510963, 0.81417286040029], [1.06944665924097, 0.00385956572525086, 1.11040155453003, 0.995876674738521], [1.4034480182856, -1.401022360514, 0.969398182526215, 0.81417286040029], [-1.93656557216072, 0.00385956572525086, -1.3571574555367, -1.36627291165848], [0.0674425821070736, -1.20032494247982, 0.546388066514775, 0.450765231723828], [0.0674425821070736, -1.20032494247982, 0.475886380512869, 0.269061417385597], [2.07145073637487, 0.20455698375943, 0.969398182526215, 0.995876674738521], [0.73544530019634, 0.00385956572525086, 0.828394810522402, 1.17758048907675], [1.4034480182856, 0.605951819827788, 1.18090324053193, 1.35928430341498], [0.902445979718656, -0.196837852308928, 1.03989986852812, 1.17758048907675], [-0.934561495026824, -1
 .20032494247982, 0.193879636505243, 0.269061417385597]]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>[[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]]</td>\n",
+       "        <td>[[-0.767560815504508, 1.00734665589615, -1.21615408353289, -1.00286528298202], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.568444620674023, -0.798930106411465, 0.687391438518589, 0.632469046062059], [0.568444620674023, -0.598232688377286, 0.757893124520495, 0.269061417385597], [0.401443941151707, 2.81362341820376, -1.07515071152907, -0.821161468643789], [0.902445979718656, -1.60171977854818, 0.687391438518589, 0.269061417385597], [0.234443261629389, 0.00385956572525086, 0.757893124520495, 0.81417286040029], [-1.10156217454914, 0.806649237861967, -1.00464902552717, -1.18456909732025], [-1.76956489263841, 0.00385956572525086, -1.21615408353289, -1.18456909732025], [0.234443261629389, -0.999627524445644, 0.616889752516682, 0.450765231723828], [-0.767560815504508, 0.405254401793609, -1.28665576953479, -1.18456909732025], [-0.600560135982193, 1.60943890999868, -1.00464902552717, -1.18456909732025], [1.90445005685255, -0.19683785230
 8928, 1.11040155453003, 0.81417286040029], [-0.767560815504508, 1.00734665589615, -1.00464902552717, -0.457753839967327], [0.234443261629389, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [0.0674425821070736, 1.00734665589615, -1.21615408353289, -1.18456909732025], [-1.76956489263841, 0.405254401793609, -1.21615408353289, -1.18456909732025], [0.902445979718656, -0.598232688377286, 1.46290998453956, 1.35928430341498], [0.401443941151707, -0.798930106411465, 0.334883008509056, 0.269061417385597], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.36627291165848], [2.07145073637487, 0.20455698375943, 1.18090324053193, 1.17758048907675], [-1.76956489263841, -0.196837852308928, -1.14565239753098, -1.18456909732025], [0.568444620674023, 2.01083374606704, -1.28665576953479, -1.18456909732025], [0.0674425821070736, -0.999627524445644, 0.687391438518589, 0.81417286040029], [0.902445979718656, 0.806649237861967, 1.03989986852812, 1.35928430341498], [-0.0995580974152422
 , 1.4087414919645, -1.07515071152907, -1.18456909732025]]</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(0L, [[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]], [[-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.00286528298202], [-0.767560815504508, 0.806649237861967, -1.07515071152907, -1.18456909732025], [-0.0995580974152422, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [-0.433559456459875, -0.598232688377286, 0.616889752516682, 0.995876674738521], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [1.23644733876329, -1.60171977854818, 1.03989986852812, 1.17758048907675], [1.06944665924097, -0.196837852308928, 1.18090324053193, 0.995876674738521], [0.0674425821070736, -0.798930106411465, 0.969398182526215, 0.632469046062059], [0.568444620674023, -0.598232688377286,
  0.616889752516682, 0.632469046062059], [-0.600560135982193, 1.60943890999868, -0.793143967521448, -0.821161468643789], [-1.60256421311609, -1.401022360514, -1.21615408353289, -1.00286528298202], [-0.600560135982193, 0.806649237861967, -1.07515071152907, -1.18456909732025], [-0.0995580974152422, 1.81013632803286, -1.21615408353289, -0.821161468643789], [0.401443941151707, -0.397535270343108, 1.03989986852812, 0.81417286040029], [-0.767560815504508, -2.00311461461654, 0.334883008509056, 0.269061417385597], [0.234443261629389, -0.196837852308928, 0.405384694510963, 0.81417286040029], [1.06944665924097, 0.00385956572525086, 1.11040155453003, 0.995876674738521], [1.4034480182856, -1.401022360514, 0.969398182526215, 0.81417286040029], [-1.93656557216072, 0.00385956572525086, -1.3571574555367, -1.36627291165848], [0.0674425821070736, -1.20032494247982, 0.546388066514775, 0.450765231723828], [0.0674425821070736, -1.20032494247982, 0.475886380512869, 0.269061417385597], [2.07145073637487, 0
 .20455698375943, 0.969398182526215, 0.995876674738521], [0.73544530019634, 0.00385956572525086, 0.828394810522402, 1.17758048907675], [1.4034480182856, 0.605951819827788, 1.18090324053193, 1.35928430341498], [0.902445979718656, -0.196837852308928, 1.03989986852812, 1.17758048907675], [-0.934561495026824, -1.20032494247982, 0.193879636505243, 0.269061417385597]]),\n",
+       " (1L, [[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]], [[-0.767560815504508, 1.00734665589615, -1.21615408353289, -1.00286528298202], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.568444620674023, -0.798930106411465, 0.687391438518589, 0.632469046062059], [0.568444620674023, -0.598232688377286, 0.757893124520495, 0.269061417385597], [0.401443941151707, 2.81362341820376, -1.07515071152907, -0.821161468643789], [0.902445979718656, -1.60171977854818, 0.687391438518589, 0.269061417385597], [0.234443261629389, 0.00385956572525086, 0.757893124520495, 0.81417286040029], [-1.10156217454914, 0.806649237861967, -1.00464902552717, -1.18456909732025], [-1.76956489263841, 0.00385956572525086, 
 -1.21615408353289, -1.18456909732025], [0.234443261629389, -0.999627524445644, 0.616889752516682, 0.450765231723828], [-0.767560815504508, 0.405254401793609, -1.28665576953479, -1.18456909732025], [-0.600560135982193, 1.60943890999868, -1.00464902552717, -1.18456909732025], [1.90445005685255, -0.196837852308928, 1.11040155453003, 0.81417286040029], [-0.767560815504508, 1.00734665589615, -1.00464902552717, -0.457753839967327], [0.234443261629389, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [0.0674425821070736, 1.00734665589615, -1.21615408353289, -1.18456909732025], [-1.76956489263841, 0.405254401793609, -1.21615408353289, -1.18456909732025], [0.902445979718656, -0.598232688377286, 1.46290998453956, 1.35928430341498], [0.401443941151707, -0.798930106411465, 0.334883008509056, 0.269061417385597], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.36627291165848], [2.07145073637487, 0.20455698375943, 1.18090324053193, 1.17758048907675], [-1.76956489263841, -0.
 196837852308928, -1.14565239753098, -1.18456909732025], [0.568444620674023, 2.01083374606704, -1.28665576953479, -1.18456909732025], [0.0674425821070736, -0.999627524445644, 0.687391438518589, 0.81417286040029], [0.902445979718656, 0.806649237861967, 1.03989986852812, 1.35928430341498], [-0.0995580974152422, 1.4087414919645, -1.07515071152907, -1.18456909732025]])]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;\n",
+    "\n",
+    "SELECT madlib.minibatch_preprocessor('iris_data',         -- Source table\n",
+    "                                     'iris_data_packed',  -- Output table\n",
+    "                                     'class_text',        -- Dependent variable\n",
+    "                                     'attributes'        -- Independent variables\n",
+    "                                     );\n",
+    "\n",
+    "SELECT * FROM iris_data_packed ORDER BY __id__;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For small datasets like in this example, buffer size is mainly determined by the number of segments in the database. For a Greenplum database with 2 segments, there will be 2 rows with a buffer size of 26. For PostgresSQL, there would be only one row with a buffer size of 52 since it is a single node database. For larger data sets, other factors go into computing buffers size besides number of segments. \n",
+    "\n",
+    "Also, note above that the dependent variable has been one-hot encoded since it is categorical. Here is a sample of the packed output table\n",
+    "\n",
+    "Review the output summary table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>source_table</th>\n",
+       "        <th>output_table</th>\n",
+       "        <th>dependent_varname</th>\n",
+       "        <th>independent_varname</th>\n",
+       "        <th>dependent_vartype</th>\n",
+       "        <th>buffer_size</th>\n",
+       "        <th>class_values</th>\n",
+       "        <th>num_rows_processed</th>\n",
+       "        <th>num_missing_rows_skipped</th>\n",
+       "        <th>grouping_cols</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>iris_data</td>\n",
+       "        <td>iris_data_packed</td>\n",
+       "        <td>class_text</td>\n",
+       "        <td>attributes</td>\n",
+       "        <td>character varying</td>\n",
+       "        <td>26</td>\n",
+       "        <td>[u'Iris_setosa', u'Iris_versicolor']</td>\n",
+       "        <td>52</td>\n",
+       "        <td>0</td>\n",
+       "        <td>None</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(u'iris_data', u'iris_data_packed', u'class_text', u'attributes', u'character varying', 26, [u'Iris_setosa', u'Iris_versicolor'], 52, 0, None)]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "SELECT * FROM iris_data_packed_summary;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Review the output standardization table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>mean</th>\n",
+       "        <th>std</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>[5.45961538462, 2.99807692308, 3.025, 0.851923076923]</td>\n",
+       "        <td>[0.598799958695, 0.498262513686, 1.41840579525, 0.550346179381]</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[([5.45961538462, 2.99807692308, 3.025, 0.851923076923], [0.598799958695, 0.498262513686, 1.41840579525, 0.550346179381])]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "SELECT * FROM iris_data_packed_standardization;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 3.  Change buffer size \n",
+    "\n",
+    "Generally the default buffer size will work well, but if you have occasion to change it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "6 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>__id__</th>\n",
+       "        <th>dependent_varname</th>\n",
+       "        <th>independent_varname</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>[[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]]</td>\n",
+       "        <td>[[-0.0995580974152422, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [-0.767560815504508, 1.00734665589615, -1.00464902552717, -0.457753839967327], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.568444620674023, -0.798930106411465, 0.687391438518589, 0.632469046062059], [-0.767560815504508, 0.405254401793609, -1.28665576953479, -1.18456909732025], [-0.767560815504508, -2.00311461461654, 0.334883008509056, 0.269061417385597], [2.07145073637487, 0.20455698375943, 0.969398182526215, 0.995876674738521], [0.401443941151707, 2.81362341820376, -1.07515071152907, -0.821161468643789], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.0674425821070736, -1.20032494247982, 0.475886380512869, 0.269061417385597]]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>[[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]]</td>\n",
+       "        <td>[[0.0674425821070736, -0.999627524445644, 0.687391438518589, 0.81417286040029], [-0.0995580974152422, 1.4087414919645, -1.07515071152907, -1.18456909732025], [-0.0995580974152422, 1.81013632803286, -1.21615408353289, -0.821161468643789], [1.06944665924097, 0.00385956572525086, 1.11040155453003, 0.995876674738521], [0.0674425821070736, -0.798930106411465, 0.969398182526215, 0.632469046062059], [-1.10156217454914, 0.806649237861967, -1.00464902552717, -1.18456909732025], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.00286528298202], [-0.600560135982193, 1.60943890999868, -1.00464902552717, -1.18456909732025], [0.902445979718656, -0.598232688377286, 1.46290998453956, 1.35928430341498], [0.401443941151707, -0.798930106411465, 0.334883008509056, 0.269061417385597]]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>[[1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]]</td>\n",
+       "        <td>[[-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.36627291165848], [0.234443261629389, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [0.902445979718656, -0.196837852308928, 1.03989986852812, 1.17758048907675], [2.07145073637487, 0.20455698375943, 1.18090324053193, 1.17758048907675], [1.90445005685255, -0.196837852308928, 1.11040155453003, 0.81417286040029], [-0.600560135982193, 0.806649237861967, -1.07515071152907, -1.18456909732025], [-0.433559456459875, -0.598232688377286, 0.616889752516682, 0.995876674738521], [0.73544530019634, 0.00385956572525086, 0.828394810522402, 1.17758048907675], [1.06944665924097, -0.196837852308928, 1.18090324053193, 0.995876674738521], [-0.767560815504508, 1.00734665589615, -1.21615408353289, -1.00286528298202]]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>3</td>\n",
+       "        <td>[[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]]</td>\n",
+       "        <td>[[0.902445979718656, 0.806649237861967, 1.03989986852812, 1.35928430341498], [1.4034480182856, -1.401022360514, 0.969398182526215, 0.81417286040029], [0.568444620674023, -0.598232688377286, 0.616889752516682, 0.632469046062059], [1.4034480182856, 0.605951819827788, 1.18090324053193, 1.35928430341498], [-1.60256421311609, -1.401022360514, -1.21615408353289, -1.00286528298202], [-1.76956489263841, -0.196837852308928, -1.14565239753098, -1.18456909732025], [0.0674425821070736, -1.20032494247982, 0.546388066514775, 0.450765231723828], [0.0674425821070736, 1.00734665589615, -1.21615408353289, -1.18456909732025], [-1.76956489263841, 0.405254401793609, -1.21615408353289, -1.18456909732025], [0.234443261629389, -0.999627524445644, 0.616889752516682, 0.450765231723828]]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>4</td>\n",
+       "        <td>[[1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]]</td>\n",
+       "        <td>[[0.568444620674023, 2.01083374606704, -1.28665576953479, -1.18456909732025], [0.234443261629389, -0.196837852308928, 0.405384694510963, 0.81417286040029], [-0.934561495026824, -1.20032494247982, 0.193879636505243, 0.269061417385597], [0.568444620674023, -0.598232688377286, 0.757893124520495, 0.269061417385597], [0.902445979718656, -1.60171977854818, 0.687391438518589, 0.269061417385597], [1.23644733876329, -1.60171977854818, 1.03989986852812, 1.17758048907675], [-1.76956489263841, 0.00385956572525086, -1.21615408353289, -1.18456909732025], [0.401443941151707, -0.397535270343108, 1.03989986852812, 0.81417286040029], [0.234443261629389, 0.00385956572525086, 0.757893124520495, 0.81417286040029], [-0.767560815504508, 0.806649237861967, -1.07515071152907, -1.18456909732025]]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>5</td>\n",
+       "        <td>[[1.0, 0.0], [1.0, 0.0]]</td>\n",
+       "        <td>[[-0.600560135982193, 1.60943890999868, -0.793143967521448, -0.821161468643789], [-1.93656557216072, 0.00385956572525086, -1.3571574555367, -1.36627291165848]]</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(0L, [[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]], [[-0.0995580974152422, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [-0.767560815504508, 1.00734665589615, -1.00464902552717, -0.457753839967327], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.568444620674023, -0.798930106411465, 0.687391438518589, 0.632469046062059], [-0.767560815504508, 0.405254401793609, -1.28665576953479, -1.18456909732025], [-0.767560815504508, -2.00311461461654, 0.334883008509056, 0.269061417385597], [2.07145073637487, 0.20455698375943, 0.969398182526215, 0.995876674738521], [0.401443941151707, 2.81362341820376, -1.07515071152907, -0.821161468643789], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.0674425821070736, -1.20032494247982, 0.475886380512869, 0.269061417385597]]),\n",
+       " (1L, [[0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]], [[0.0674425821070736, -0.999627524445644, 0.687391438518589, 0.81417286040029], [-0.0995580974152422, 1.4087414919645, -1.07515071152907, -1.18456909732025], [-0.0995580974152422, 1.81013632803286, -1.21615408353289, -0.821161468643789], [1.06944665924097, 0.00385956572525086, 1.11040155453003, 0.995876674738521], [0.0674425821070736, -0.798930106411465, 0.969398182526215, 0.632469046062059], [-1.10156217454914, 0.806649237861967, -1.00464902552717, -1.18456909732025], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.00286528298202], [-0.600560135982193, 1.60943890999868, -1.00464902552717, -1.18456909732025], [0.902445979718656, -0.598232688377286, 1.46290998453956, 1.35928430341498], [0.401443941151707, -0.798930106411465, 0.334883008509056, 0.269061417385597]]),\n",
+       " (2L, [[1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]], [[-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.36627291165848], [0.234443261629389, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [0.902445979718656, -0.196837852308928, 1.03989986852812, 1.17758048907675], [2.07145073637487, 0.20455698375943, 1.18090324053193, 1.17758048907675], [1.90445005685255, -0.196837852308928, 1.11040155453003, 0.81417286040029], [-0.600560135982193, 0.806649237861967, -1.07515071152907, -1.18456909732025], [-0.433559456459875, -0.598232688377286, 0.616889752516682, 0.995876674738521], [0.73544530019634, 0.00385956572525086, 0.828394810522402, 1.17758048907675], [1.06944665924097, -0.196837852308928, 1.18090324053193, 0.995876674738521], [-0.767560815504508, 1.00734665589615, -1.21615408353289, -1.00286528298202]]),\n",
+       " (3L, [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]], [[0.902445979718656, 0.806649237861967, 1.03989986852812, 1.35928430341498], [1.4034480182856, -1.401022360514, 0.969398182526215, 0.81417286040029], [0.568444620674023, -0.598232688377286, 0.616889752516682, 0.632469046062059], [1.4034480182856, 0.605951819827788, 1.18090324053193, 1.35928430341498], [-1.60256421311609, -1.401022360514, -1.21615408353289, -1.00286528298202], [-1.76956489263841, -0.196837852308928, -1.14565239753098, -1.18456909732025], [0.0674425821070736, -1.20032494247982, 0.546388066514775, 0.450765231723828], [0.0674425821070736, 1.00734665589615, -1.21615408353289, -1.18456909732025], [-1.76956489263841, 0.405254401793609, -1.21615408353289, -1.18456909732025], [0.234443261629389, -0.999627524445644, 0.616889752516682, 0.450765231723828]]),\n",
+       " (4L, [[1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]], [[0.568444620674023, 2.01083374606704, -1.28665576953479, -1.18456909732025], [0.234443261629389, -0.196837852308928, 0.405384694510963, 0.81417286040029], [-0.934561495026824, -1.20032494247982, 0.193879636505243, 0.269061417385597], [0.568444620674023, -0.598232688377286, 0.757893124520495, 0.269061417385597], [0.902445979718656, -1.60171977854818, 0.687391438518589, 0.269061417385597], [1.23644733876329, -1.60171977854818, 1.03989986852812, 1.17758048907675], [-1.76956489263841, 0.00385956572525086, -1.21615408353289, -1.18456909732025], [0.401443941151707, -0.397535270343108, 1.03989986852812, 0.81417286040029], [0.234443261629389, 0.00385956572525086, 0.757893124520495, 0.81417286040029], [-0.767560815504508, 0.806649237861967, -1.07515071152907, -1.18456909732025]]),\n",
+       " (5L, [[1.0, 0.0], [1.0, 0.0]], [[-0.600560135982193, 1.60943890999868, -0.793143967521448, -0.821161468643789], [-1.93656557216072, 0.00385956572525086, -1.3571574555367, -1.36627291165848]])]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;\n",
+    "\n",
+    "SELECT madlib.minibatch_preprocessor('iris_data',         -- Source table\n",
+    "                                     'iris_data_packed',  -- Output table\n",
+    "                                     'class_text',        -- Dependent variable\n",
+    "                                     'attributes',        -- Independent variables\n",
+    "                                     NULL,                -- Grouping\n",
+    "                                     10                   -- Buffer size\n",
+    "                                     );\n",
+    "\n",
+    "SELECT * FROM iris_data_packed ORDER BY __id__;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Review the output summary data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>source_table</th>\n",
+       "        <th>output_table</th>\n",
+       "        <th>dependent_varname</th>\n",
+       "        <th>independent_varname</th>\n",
+       "        <th>dependent_vartype</th>\n",
+       "        <th>buffer_size</th>\n",
+       "        <th>class_values</th>\n",
+       "        <th>num_rows_processed</th>\n",
+       "        <th>num_missing_rows_skipped</th>\n",
+       "        <th>grouping_cols</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>iris_data</td>\n",
+       "        <td>iris_data_packed</td>\n",
+       "        <td>class_text</td>\n",
+       "        <td>attributes</td>\n",
+       "        <td>character varying</td>\n",
+       "        <td>10</td>\n",
+       "        <td>[u'Iris_setosa', u'Iris_versicolor']</td>\n",
+       "        <td>52</td>\n",
+       "        <td>0</td>\n",
+       "        <td>None</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(u'iris_data', u'iris_data_packed', u'class_text', u'attributes', u'character varying', 10, [u'Iris_setosa', u'Iris_versicolor'], 52, 0, None)]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "SELECT * FROM iris_data_packed_summary;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 4. Grouping\n",
+    "\n",
+    "Run the preprocessor with grouping by state:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "5 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>__id__</th>\n",
+       "        <th>state</th>\n",
+       "        <th>dependent_varname</th>\n",
+       "        <th>independent_varname</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>Alaska</td>\n",
+       "        <td>[[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]]</td>\n",
+       "        <td>[[1.26030711687938, -1.615325368523, 1.10943660794792, 1.24354000843452], [1.10129640587123, -0.126074175104234, 1.2524188915498, 1.05700900716934], [0.306242850830503, -0.977074857057813, 0.680489757142278, 0.497416003373807], [0.942285694863087, -1.615325368523, 0.751980898943218, 0.310885002108629], [0.783274983854942, 0.0866759953841608, 0.894963182545097, 1.24354000843452], [-0.806832126226518, 0.299426165872556, -1.03529764608027, -1.36789400927797], [-0.488810704210227, 1.78867735929132, -0.963806504279335, -1.18136300801279], [-1.60188568126725, 0.512176336360951, -1.17827992968215, -1.18136300801279], [-0.965842837234665, 0.0866759953841608, -1.10678878788121, -0.994832006747614], [-0.647821415218373, 1.15042684782613, -1.17827992968215, -0.994832006747614], [-0.647821415218373, -2.04082570949979, 0.394525189938519, 0.310885002108629], [2.05536067192011, 0.299426165872556, 1.03794546614698, 1.05700900716934], [-0.647821415218373, 0.512176336360951, -1.24
 977107148309, -1.18136300801279]]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>Alaska</td>\n",
+       "        <td>[[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]</td>\n",
+       "        <td>[[1.41931782788752, 0.724926506849345, 1.2524188915498, 1.4300710096997], [-0.647821415218373, 1.15042684782613, -0.963806504279335, -0.435239002952081], [0.624264272846795, -0.551574516081023, 0.823472040744157, 0.310885002108629], [-1.4428749702591, -1.4025751980346, -1.17827992968215, -0.994832006747614], [0.306242850830503, -0.126074175104234, 0.466016331739459, 0.870478005904162], [1.89634996091196, -0.126074175104234, 1.18092774974886, 0.870478005904162], [-0.32979999320208, -0.551574516081023, 0.680489757142278, 1.05700900716934], [0.46525356183865, -0.338824345592629, 1.10943660794792, 0.870478005904162], [0.306242850830503, 0.0866759953841608, 1.10943660794792, 1.24354000843452], [-0.488810704210227, 0.93767667733774, -1.03529764608027, -1.18136300801279], [-0.488810704210227, 1.78867735929132, -0.749333078876516, -0.808301005482437], [0.147232139822357, 1.15042684782613, -1.17827992968215, -1.18136300801279], [-1.60188568126725, 0.0866759953841608, -1.1
 7827992968215, -1.18136300801279]]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>2</td>\n",
+       "        <td>Alaska</td>\n",
+       "        <td>[[0.0, 1.0]]</td>\n",
+       "        <td>[[-0.806832126226518, -1.18982502754621, 0.25154290633664, 0.310885002108629]]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>Tennessee</td>\n",
+       "        <td>[[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]]</td>\n",
+       "        <td>[[-0.0286196553591748, -1.22176567731394, 0.412632633639227, 0.22669394242252], [-0.207492501354026, 1.25994585473, -1.12079945083087, -1.19014319771823], [0.507998882625381, -0.839963903153331, 0.621737008794241, 0.580903227457708], [-0.922983885333435, 0.687243193489089, -1.12079945083087, -1.19014319771823], [1.04461742060994, -0.0763603548321211, 1.03994575910427, 0.935112512492896], [2.11785449657905, 0.114540532248182, 1.10964721748927, 1.11221715501049], [0.507998882625381, -0.649063016073029, 0.552035550409236, 0.580903227457708], [-1.99622096130255, -0.267261241912424, -1.19050090921588, -1.19014319771823], [1.40236311259964, -1.41266656439424, 0.90054284233426, 0.758007869975302], [0.32912603663053, 2.59625206429212, -1.12079945083087, -0.835933912683043], [-0.207492501354026, 1.6417476288906, -1.26020236760088, -0.835933912683043], [-2.1750938072974, -0.0763603548321211, -1.39960528437089, -1.36724784023582], [-0.0286196553591748, -1.22176567731394, 0.
 482334092024232, 0.403798584940115]]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>Tennessee</td>\n",
+       "        <td>[[0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]]</td>\n",
+       "        <td>[[0.865744574615085, 0.687243193489089, 0.970244300719264, 1.28932179752808], [-0.0286196553591748, -0.839963903153331, 0.90054284233426, 0.580903227457708], [-1.28072957732314, 0.687243193489089, -1.05109799244587, -1.19014319771823], [-1.10185673132829, 0.114540532248182, -1.12079945083087, -1.36724784023582], [-0.0286196553591748, -1.03086479023363, 0.621737008794241, 0.758007869975302], [-0.207492501354026, -0.0763603548321211, 0.970244300719264, 1.11221715501049], [0.865744574615085, -0.649063016073029, 1.38845305102929, 1.28932179752808], [0.150253190635677, -0.0763603548321211, 0.691438467179245, 0.758007869975302], [0.32912603663053, -0.839963903153331, 0.273229716869218, 0.22669394242252], [-1.28072957732314, -0.0763603548321211, -1.19050090921588, -1.36724784023582], [0.507998882625381, 1.8326485159709, -1.32990382598589, -1.19014319771823], [0.865744574615085, -0.267261241912424, 0.970244300719264, 1.11221715501049]]</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(0L, u'Alaska', [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0]], [[1.26030711687938, -1.615325368523, 1.10943660794792, 1.24354000843452], [1.10129640587123, -0.126074175104234, 1.2524188915498, 1.05700900716934], [0.306242850830503, -0.977074857057813, 0.680489757142278, 0.497416003373807], [0.942285694863087, -1.615325368523, 0.751980898943218, 0.310885002108629], [0.783274983854942, 0.0866759953841608, 0.894963182545097, 1.24354000843452], [-0.806832126226518, 0.299426165872556, -1.03529764608027, -1.36789400927797], [-0.488810704210227, 1.78867735929132, -0.963806504279335, -1.18136300801279], [-1.60188568126725, 0.512176336360951, -1.17827992968215, -1.18136300801279], [-0.965842837234665, 0.0866759953841608, -1.10678878788121, -0.994832006747614], [-0.647821415218373, 1.15042684782613, -1.17827992968215, -0.994832006747614], [-0.647821415218373, -2.040825709499
 79, 0.394525189938519, 0.310885002108629], [2.05536067192011, 0.299426165872556, 1.03794546614698, 1.05700900716934], [-0.647821415218373, 0.512176336360951, -1.24977107148309, -1.18136300801279]]),\n",
+       " (1L, u'Alaska', [[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]], [[1.41931782788752, 0.724926506849345, 1.2524188915498, 1.4300710096997], [-0.647821415218373, 1.15042684782613, -0.963806504279335, -0.435239002952081], [0.624264272846795, -0.551574516081023, 0.823472040744157, 0.310885002108629], [-1.4428749702591, -1.4025751980346, -1.17827992968215, -0.994832006747614], [0.306242850830503, -0.126074175104234, 0.466016331739459, 0.870478005904162], [1.89634996091196, -0.126074175104234, 1.18092774974886, 0.870478005904162], [-0.32979999320208, -0.551574516081023, 0.680489757142278, 1.05700900716934], [0.46525356183865, -0.338824345592629, 1.10943660794792, 0.870478005904162], [0.306242850830503, 0.0866759953841608, 1.10943660794792, 1.24354000843452], [-0.488810704210227, 0.93767667733774, -1.03529764608027, -1.18136300801279], [-0.488810704210227, 1.78867735929132,
  -0.749333078876516, -0.808301005482437], [0.147232139822357, 1.15042684782613, -1.17827992968215, -1.18136300801279], [-1.60188568126725, 0.0866759953841608, -1.17827992968215, -1.18136300801279]]),\n",
+       " (2L, u'Alaska', [[0.0, 1.0]], [[-0.806832126226518, -1.18982502754621, 0.25154290633664, 0.310885002108629]]),\n",
+       " (0L, u'Tennessee', [[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]], [[-0.0286196553591748, -1.22176567731394, 0.412632633639227, 0.22669394242252], [-0.207492501354026, 1.25994585473, -1.12079945083087, -1.19014319771823], [0.507998882625381, -0.839963903153331, 0.621737008794241, 0.580903227457708], [-0.922983885333435, 0.687243193489089, -1.12079945083087, -1.19014319771823], [1.04461742060994, -0.0763603548321211, 1.03994575910427, 0.935112512492896], [2.11785449657905, 0.114540532248182, 1.10964721748927, 1.11221715501049], [0.507998882625381, -0.649063016073029, 0.552035550409236, 0.580903227457708], [-1.99622096130255, -0.267261241912424, -1.19050090921588, -1.19014319771823], [1.40236311259964, -1.41266656439424, 0.90054284233426, 0.758007869975302], [0.32912603663053, 2.59625206429212, -1.12079945083087, -0.835933912683043], [-0.207492501354026, 1.64174762889
 06, -1.26020236760088, -0.835933912683043], [-2.1750938072974, -0.0763603548321211, -1.39960528437089, -1.36724784023582], [-0.0286196553591748, -1.22176567731394, 0.482334092024232, 0.403798584940115]]),\n",
+       " (1L, u'Tennessee', [[0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0]], [[0.865744574615085, 0.687243193489089, 0.970244300719264, 1.28932179752808], [-0.0286196553591748, -0.839963903153331, 0.90054284233426, 0.580903227457708], [-1.28072957732314, 0.687243193489089, -1.05109799244587, -1.19014319771823], [-1.10185673132829, 0.114540532248182, -1.12079945083087, -1.36724784023582], [-0.0286196553591748, -1.03086479023363, 0.621737008794241, 0.758007869975302], [-0.207492501354026, -0.0763603548321211, 0.970244300719264, 1.11221715501049], [0.865744574615085, -0.649063016073029, 1.38845305102929, 1.28932179752808], [0.150253190635677, -0.0763603548321211, 0.691438467179245, 0.758007869975302], [0.32912603663053, -0.839963903153331, 0.273229716869218, 0.22669394242252], [-1.28072957732314, -0.0763603548321211, -1.19050090921588, -1.36724784023582], [0.507998882625381, 1.8326485159709
 , -1.32990382598589, -1.19014319771823], [0.865744574615085, -0.267261241912424, 0.970244300719264, 1.11221715501049]])]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;\n",
+    "\n",
+    "SELECT madlib.minibatch_preprocessor('iris_data',         -- Source table\n",
+    "                                     'iris_data_packed',  -- Output table\n",
+    "                                     'class_text',        -- Dependent variable\n",
+    "                                     'attributes',        -- Independent variables\n",
+    "                                     'state'              -- Grouping\n",
+    "                                     );\n",
+    "\n",
+    "SELECT * FROM iris_data_packed ORDER BY state, __id__;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Review the output summary table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>source_table</th>\n",
+       "        <th>output_table</th>\n",
+       "        <th>dependent_varname</th>\n",
+       "        <th>independent_varname</th>\n",
+       "        <th>dependent_vartype</th>\n",
+       "        <th>buffer_size</th>\n",
+       "        <th>class_values</th>\n",
+       "        <th>num_rows_processed</th>\n",
+       "        <th>num_missing_rows_skipped</th>\n",
+       "        <th>grouping_cols</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>iris_data</td>\n",
+       "        <td>iris_data_packed</td>\n",
+       "        <td>class_text</td>\n",
+       "        <td>attributes</td>\n",
+       "        <td>character varying</td>\n",
+       "        <td>13</td>\n",
+       "        <td>[u'Iris_setosa', u'Iris_versicolor']</td>\n",
+       "        <td>52</td>\n",
+       "        <td>0</td>\n",
+       "        <td>state</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(u'iris_data', u'iris_data_packed', u'class_text', u'attributes', u'character varying', 13, [u'Iris_setosa', u'Iris_versicolor'], 52, 0, u'state')]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "SELECT * FROM iris_data_packed_summary;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Review the output standardization table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>state</th>\n",
+       "        <th>mean</th>\n",
+       "        <th>std</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>Alaska</td>\n",
+       "        <td>[5.40740740740741, 2.95925925925926, 2.94814814814815, 0.833333333333333]</td>\n",
+       "        <td>[0.628888452645665, 0.470034875978888, 1.39877469405147, 0.536103914747325]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>Tennessee</td>\n",
+       "        <td>[5.516, 3.04, 3.108, 0.872]</td>\n",
+       "        <td>[0.55905634778617, 0.523832034148353, 1.43469021046357, 0.564637937088893]</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(u'Alaska', [5.40740740740741, 2.95925925925926, 2.94814814814815, 0.833333333333333], [0.628888452645665, 0.470034875978888, 1.39877469405147, 0.536103914747325]),\n",
+       " (u'Tennessee', [5.516, 3.04, 3.108, 0.872], [0.55905634778617, 0.523832034148353, 1.43469021046357, 0.564637937088893])]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "SELECT * FROM iris_data_packed_standardization ORDER BY state;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 5.  Integer dependent variable for classification\n",
+    "\n",
+    "If the depedent variable is scalar integer, and you have not already encoded it, you can ask the preprocessor to encode it for you:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Done.\n",
+      "1 rows affected.\n",
+      "2 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>__id__</th>\n",
+       "        <th>dependent_varname</th>\n",
+       "        <th>independent_varname</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>0</td>\n",
+       "        <td>[[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]</td>\n",
+       "        <td>[[0.902445979718656, -1.60171977854818, 0.687391438518589, 0.269061417385597], [0.401443941151707, -0.798930106411465, 0.334883008509056, 0.269061417385597], [0.568444620674023, -0.598232688377286, 0.757893124520495, 0.269061417385597], [0.0674425821070736, -1.20032494247982, 0.475886380512869, 0.269061417385597], [-0.934561495026824, -1.20032494247982, 0.193879636505243, 0.269061417385597], [-1.76956489263841, 0.405254401793609, -1.21615408353289, -1.18456909732025], [0.568444620674023, -0.598232688377286, 0.616889752516682, 0.632469046062059], [-0.767560815504508, 1.00734665589615, -1.21615408353289, -1.00286528298202], [-0.0995580974152422, 1.4087414919645, -1.07515071152907, -1.18456909732025], [-0.767560815504508, 1.00734665589615, -1.00464902552717, -0.457753839967327], [-0.600560135982193, 1.60943890999868, -1.00464902552717, -1.18456909732025], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.234443261629389, 0.003859565725
 25086, 1.03989986852812, 1.17758048907675], [-0.0995580974152422, 1.81013632803286, -1.21615408353289, -0.821161468643789], [-1.76956489263841, 0.00385956572525086, -1.21615408353289, -1.18456909732025], [-1.60256421311609, -1.401022360514, -1.21615408353289, -1.00286528298202], [-1.10156217454914, 0.806649237861967, -1.00464902552717, -1.18456909732025], [-0.767560815504508, 0.405254401793609, -1.28665576953479, -1.18456909732025], [-1.76956489263841, -0.196837852308928, -1.14565239753098, -1.18456909732025], [-1.93656557216072, 0.00385956572525086, -1.3571574555367, -1.36627291165848], [1.06944665924097, -0.196837852308928, 1.18090324053193, 0.995876674738521], [0.568444620674023, 2.01083374606704, -1.28665576953479, -1.18456909732025], [0.401443941151707, 2.81362341820376, -1.07515071152907, -0.821161468643789], [0.0674425821070736, -0.999627524445644, 0.687391438518589, 0.81417286040029], [0.902445979718656, -0.196837852308928, 1.03989986852812, 1.17758048907675], [0.56844462067
 4023, -0.798930106411465, 0.687391438518589, 0.632469046062059]]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>1</td>\n",
+       "        <td>[[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]]</td>\n",
+       "        <td>[[0.902445979718656, 0.806649237861967, 1.03989986852812, 1.35928430341498], [0.902445979718656, -0.598232688377286, 1.46290998453956, 1.35928430341498], [-0.0995580974152422, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [0.234443261629389, 0.00385956572525086, 0.757893124520495, 0.81417286040029], [0.0674425821070736, -1.20032494247982, 0.546388066514775, 0.450765231723828], [1.23644733876329, -1.60171977854818, 1.03989986852812, 1.17758048907675], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.00286528298202], [1.4034480182856, -1.401022360514, 0.969398182526215, 0.81417286040029], [1.4034480182856, 0.605951819827788, 1.18090324053193, 1.35928430341498], [-0.600560135982193, 0.806649237861967, -1.07515071152907, -1.18456909732025], [0.401443941151707, -0.397535270343108, 1.03989986852812, 0.81417286040029], [-0.767560815504508, 0.806649237861967, -1.07515071152907, -1.18456909732025], [1.06944665924097, 0.00385956572525086, 1.110401
 55453003, 0.995876674738521], [0.234443261629389, -0.999627524445644, 0.616889752516682, 0.450765231723828], [0.0674425821070736, 1.00734665589615, -1.21615408353289, -1.18456909732025], [2.07145073637487, 0.20455698375943, 0.969398182526215, 0.995876674738521], [0.73544530019634, 0.00385956572525086, 0.828394810522402, 1.17758048907675], [0.234443261629389, -0.196837852308928, 0.405384694510963, 0.81417286040029], [-0.767560815504508, -2.00311461461654, 0.334883008509056, 0.269061417385597], [1.90445005685255, -0.196837852308928, 1.11040155453003, 0.81417286040029], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [2.07145073637487, 0.20455698375943, 1.18090324053193, 1.17758048907675], [0.0674425821070736, -0.798930106411465, 0.969398182526215, 0.632469046062059], [-0.433559456459875, -0.598232688377286, 0.616889752516682, 0.995876674738521], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.36627291165848], [-0.600560135982193, 1.60943890
 999868, -0.793143967521448, -0.821161468643789]]</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(0L, [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]], [[0.902445979718656, -1.60171977854818, 0.687391438518589, 0.269061417385597], [0.401443941151707, -0.798930106411465, 0.334883008509056, 0.269061417385597], [0.568444620674023, -0.598232688377286, 0.757893124520495, 0.269061417385597], [0.0674425821070736, -1.20032494247982, 0.475886380512869, 0.269061417385597], [-0.934561495026824, -1.20032494247982, 0.193879636505243, 0.269061417385597], [-1.76956489263841, 0.405254401793609, -1.21615408353289, -1.18456909732025], [0.568444620674023, -0.598232688377286, 0.616889752516682, 0.632469046062059], [-0.767560815504508, 1.00734665589615, -1.21615408353289, -1.00286528298202], [-0.0995580974152422, 1.4087414919645, 
 -1.07515071152907, -1.18456909732025], [-0.767560815504508, 1.00734665589615, -1.00464902552717, -0.457753839967327], [-0.600560135982193, 1.60943890999868, -1.00464902552717, -1.18456909732025], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [0.234443261629389, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [-0.0995580974152422, 1.81013632803286, -1.21615408353289, -0.821161468643789], [-1.76956489263841, 0.00385956572525086, -1.21615408353289, -1.18456909732025], [-1.60256421311609, -1.401022360514, -1.21615408353289, -1.00286528298202], [-1.10156217454914, 0.806649237861967, -1.00464902552717, -1.18456909732025], [-0.767560815504508, 0.405254401793609, -1.28665576953479, -1.18456909732025], [-1.76956489263841, -0.196837852308928, -1.14565239753098, -1.18456909732025], [-1.93656557216072, 0.00385956572525086, -1.3571574555367, -1.36627291165848], [1.06944665924097, -0.196837852308928, 1.18090324053193, 0.995876674738521], [0.56844462067402
 3, 2.01083374606704, -1.28665576953479, -1.18456909732025], [0.401443941151707, 2.81362341820376, -1.07515071152907, -0.821161468643789], [0.0674425821070736, -0.999627524445644, 0.687391438518589, 0.81417286040029], [0.902445979718656, -0.196837852308928, 1.03989986852812, 1.17758048907675], [0.568444620674023, -0.798930106411465, 0.687391438518589, 0.632469046062059]]),\n",
+       " (1L, [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]], [[0.902445979718656, 0.806649237861967, 1.03989986852812, 1.35928430341498], [0.902445979718656, -0.598232688377286, 1.46290998453956, 1.35928430341498], [-0.0995580974152422, 0.00385956572525086, 1.03989986852812, 1.17758048907675], [0.234443261629389, 0.00385956572525086, 0.757893124520495, 0.81417286040029], [0.0674425821070736, -1.20032494247982, 0.546388066514775, 0.450765231723828], [1.23644733876329, -1.60171977854818, 1.03989986852812, 1.17758048907675], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.00286528298202], [1.4034480182856, -1.401022360514, 0.969398182526215, 0.81417286040029], [1.4034480182856, 0.605951819827788, 1.1809032405
 3193, 1.35928430341498], [-0.600560135982193, 0.806649237861967, -1.07515071152907, -1.18456909732025], [0.401443941151707, -0.397535270343108, 1.03989986852812, 0.81417286040029], [-0.767560815504508, 0.806649237861967, -1.07515071152907, -1.18456909732025], [1.06944665924097, 0.00385956572525086, 1.11040155453003, 0.995876674738521], [0.234443261629389, -0.999627524445644, 0.616889752516682, 0.450765231723828], [0.0674425821070736, 1.00734665589615, -1.21615408353289, -1.18456909732025], [2.07145073637487, 0.20455698375943, 0.969398182526215, 0.995876674738521], [0.73544530019634, 0.00385956572525086, 0.828394810522402, 1.17758048907675], [0.234443261629389, -0.196837852308928, 0.405384694510963, 0.81417286040029], [-0.767560815504508, -2.00311461461654, 0.334883008509056, 0.269061417385597], [1.90445005685255, -0.196837852308928, 1.11040155453003, 0.81417286040029], [-0.934561495026824, 0.20455698375943, -1.07515071152907, -1.36627291165848], [2.07145073637487, 0.20455698375943, 
 1.18090324053193, 1.17758048907675], [0.0674425821070736, -0.798930106411465, 0.969398182526215, 0.632469046062059], [-0.433559456459875, -0.598232688377286, 0.616889752516682, 0.995876674738521], [-1.10156217454914, 0.00385956572525086, -1.14565239753098, -1.36627291165848], [-0.600560135982193, 1.60943890999868, -0.793143967521448, -0.821161468643789]])]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "DROP TABLE IF EXISTS iris_data_packed, iris_data_packed_summary, iris_data_packed_standardization;\n",
+    "\n",
+    "SELECT madlib.minibatch_preprocessor('iris_data',         -- Source table\n",
+    "                                     'iris_data_packed',  -- Output table\n",
+    "                                     'class',             -- Integer dependent variable\n",
+    "                                     'attributes',        -- Independent variables\n",
+    "                                     NULL,                -- Grouping\n",
+    "                                     NULL,                -- Buffer size\n",
+    "                                     TRUE                 -- Encode scalar int dependent variable\n",
+    "                                     );\n",
+    "\n",
+    "SELECT * FROM iris_data_packed ORDER BY __id__;"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Review output summary table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1 rows affected.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<table>\n",
+       "    <tr>\n",
+       "        <th>source_table</th>\n",
+       "        <th>output_table</th>\n",
+       "        <th>dependent_varname</th>\n",
+       "        <th>independent_varname</th>\n",
+       "        <th>dependent_vartype</th>\n",
+       "        <th>buffer_size</th>\n",
+       "        <th>class_values</th>\n",
+       "        <th>num_rows_processed</th>\n",
+       "        <th>num_missing_rows_skipped</th>\n",
+       "        <th>grouping_cols</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "        <td>iris_data</td>\n",
+       "        <td>iris_data_packed</td>\n",
+       "        <td>class</td>\n",
+       "        <td>attributes</td>\n",
+       "        <td>integer</td>\n",
+       "        <td>26</td>\n",
+       "        <td>[1, 2]</td>\n",
+       "        <td>52</td>\n",
+       "        <td>0</td>\n",
+       "        <td>None</td>\n",
+       "    </tr>\n",
+       "</table>"
+      ],
+      "text/plain": [
+       "[(u'iris_data', u'iris_data_packed', u'class', u'attributes', u'integer', 26, [1, 2], 52, 0, None)]"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%%sql\n",
+    "SELECT * FROM iris_data_packed_summary;"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}