You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2017/06/16 20:58:01 UTC
[24/34] incubator-madlib git commit: Array Operations: Unnest 2-D
arrays by one level.
Array Operations: Unnest 2-D arrays by one level.
JIRA: MADLIB-1086
Unnest 2-D arrays by one level (i.e. into rows of 1-D arrays).
Example usage in k-Means shows how to unnest the 2-D centroid array
to get one centroid per row for follow on operations.
Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/3af18a93
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/3af18a93
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/3af18a93
Branch: refs/heads/latest_release
Commit: 3af18a9388d144920d5bca3e5cde27edee6e0eac
Parents: 20b1158
Author: Rashmi Raghu <rr...@pivotal.io>
Authored: Tue Apr 25 14:41:09 2017 -0700
Committer: Rashmi Raghu <rr...@pivotal.io>
Committed: Wed Apr 26 11:35:23 2017 -0700
----------------------------------------------------------------------
methods/array_ops/src/pg_gp/array_ops.sql_in | 102 ++++++++-
.../array_ops/src/pg_gp/test/array_ops.sql_in | 218 +++++++++++++++++++
src/ports/postgres/modules/kmeans/kmeans.sql_in | 89 +++++---
3 files changed, 375 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/3af18a93/methods/array_ops/src/pg_gp/array_ops.sql_in
----------------------------------------------------------------------
diff --git a/methods/array_ops/src/pg_gp/array_ops.sql_in b/methods/array_ops/src/pg_gp/array_ops.sql_in
index c83a947..08ba377 100644
--- a/methods/array_ops/src/pg_gp/array_ops.sql_in
+++ b/methods/array_ops/src/pg_gp/array_ops.sql_in
@@ -24,7 +24,7 @@ m4_include(`SQLCommon.m4')
@brief Provides fast array operations supporting other MADlib modules.
-This module provides a set of basic array operations implemented in C.
+This module provides a set of basic array operations implemented in C and SQL.
It is a support module for several machine learning algorithms that
require fast array operations.
@@ -42,6 +42,8 @@ These functions support several numeric types:
- DOUBLE PRECISION (FLOAT8)
- NUMERIC (internally casted into FLOAT8, loss of precisions can happen)
+Additionally, array_unnest_2d_to_1d() supports other data types such as TEXT or VARCHAR.
+
Several of the function require NO NULL VALUES, while others omit NULLs and return results. See details in description of individual functions.
@anchor list
@@ -126,6 +128,11 @@ Several of the function require NO NULL VALUES, while others omit NULLs and retu
<tr><th>normalize()</th><td> This function normalizes an array as sum of squares to be 1. It requires that the array is 1-D and all the values are NON-NULL.
</td></tr>
+
+<tr><th>array_unnest_2d_to_1d()</th><td> This function takes a 2-D array as the input and unnests it by one level. It returns a set of 1-D arrays that correspond to rows of
+ the input array as well as an ID column with values corresponding to row positions occupied by those 1-D arrays within the 2-D array.
+</td></tr>
+
</table>
@anchor examples
@@ -220,6 +227,30 @@ Result:
{1.3,1.3,1.3,1.3,1.3,1.3,1.3,1.3,1.3}
(1 row)
</pre>
+-# Unnest a column of 2-D arrays into sets of 1-D arrays.
+<pre class="example">
+SELECT id, (madlib.array_unnest_2d_to_1d(val)).*
+FROM (
+ SELECT 1::INT AS id, ARRAY[[1.3,2.0,3.2],[10.3,20.0,32.2]]::FLOAT8[][] AS val
+ UNION ALL
+ SELECT 2, ARRAY[[pi(),pi()/2],[2*pi(),pi()],[pi()/4,4*pi()]]::FLOAT8[][]
+) t
+ORDER BY 1,2;
+</pre>
+Result:
+<pre class="result">
+ id | unnest_row_id | unnest_result
+----+---------------+--------------------------------------
+ 1 | 1 | {1.3,2,3.2}
+ 1 | 2 | {10.3,20,32.2}
+ 2 | 1 | {3.14159265358979,1.5707963267949}
+ 2 | 2 | {6.28318530717959,3.14159265358979}
+ 2 | 3 | {0.785398163397448,12.5663706143592}
+(5 rows)
+</pre>
+If the function is called without the .* notation then it will return a
+composite record type with two attributes: the row ID and corresponding
+unnested array result.
@anchor related
@par Related Topics
@@ -636,3 +667,72 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.array_cum_prod(x anyarray) RETURNS anya
AS 'MODULE_PATHNAME', 'array_cum_prod'
LANGUAGE C IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
+
+/**
+ * @brief This function takes a 2-D array as the input and unnests it
+ * by one level.
+ * It returns a set of 1-D arrays that correspond to rows of the
+ * input array as well as an ID column containing row positions occupied by
+ * those 1-D arrays within the 2-D array (the ID column values start with
+ * 1 and not 0)
+ *
+ * @param x Array x
+ * @returns Set of 1-D arrays that corrspond to rows of x and an ID column.
+ *
+ */
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.array_unnest_2d_to_1d(
+ x ANYARRAY,
+ OUT unnest_row_id INT,
+ OUT unnest_result ANYARRAY
+)
+RETURNS SETOF RECORD
+AS
+$BODY$
+ SELECT t2.r::int, array_agg($1[t2.r][t2.c] order by t2.c) FROM
+ (
+ SELECT generate_series(array_lower($1,2),array_upper($1,2)) as c, t1.r
+ FROM
+ (
+ SELECT generate_series(array_lower($1,1),array_upper($1,1)) as r
+ ) t1
+ ) t2
+GROUP BY t2.r
+$BODY$ LANGUAGE SQL IMMUTABLE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.array_unnest_2d_to_1d()
+RETURNS TEXT AS $$
+ return """
+------------------------------------------------------------------
+ SUMMARY
+------------------------------------------------------------------
+This function takes a 2-D array as the input and unnests it by
+one level.
+It returns a set of 1-D arrays that correspond to rows of the
+input array as well as an ID column containing row positions occupied by
+those 1-D arrays within the 2-D array (the ID column values start with
+1 and not 0).
+
+------------------------------------------------------------------
+ USAGE
+------------------------------------------------------------------
+
+ SELECT ({schema_madlib}.array_unnest_2d_to_1d(input_array)).* from input_table;
+
+If the function is called without the .* notation then it will return a
+composite record type with two attributes: the row ID and corresponding
+unnested array result.
+
+------------------------------------------------------------------
+ EXAMPLE
+------------------------------------------------------------------
+SELECT id, (madlib.array_unnest_2d_to_1d(val)).*
+FROM (
+ SELECT 1::INT AS id, ARRAY[[1.3,2.0,3.2],[10.3,20.0,32.2]]::FLOAT8[][] AS val
+ UNION ALL
+ SELECT 2, ARRAY[[pi(),pi()/2],[2*pi(),pi()],[pi()/4,4*pi()]]::FLOAT8[][]
+) t
+ORDER BY 1,2;
+ """.format(schema_madlib='MADLIB_SCHEMA')
+$$ LANGUAGE PLPYTHONU IMMUTABLE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/3af18a93/methods/array_ops/src/pg_gp/test/array_ops.sql_in
----------------------------------------------------------------------
diff --git a/methods/array_ops/src/pg_gp/test/array_ops.sql_in b/methods/array_ops/src/pg_gp/test/array_ops.sql_in
index 473e32e..b05d0b7 100644
--- a/methods/array_ops/src/pg_gp/test/array_ops.sql_in
+++ b/methods/array_ops/src/pg_gp/test/array_ops.sql_in
@@ -89,3 +89,221 @@ SELECT array_scalar_mult(
(1.0/MADLIB_SCHEMA.array_sum(ARRAY[1.,2,3,4]))
);
+--------------------------------------------------------------
+-- TESTING array_unnest_2d_to_1d FUNCTION
+--------------------------------------------------------------
+-- 2-element float8 arrays
+DROP TABLE IF EXISTS unnest_2d_tbl01;
+CREATE TABLE unnest_2d_tbl01 (id INT, val DOUBLE PRECISION[][]);
+INSERT INTO unnest_2d_tbl01 VALUES
+ (1, ARRAY[[1::float8,2],[3::float8,4],[5::float8,6]]),
+ (2, ARRAY[[101::float8,202],[303::float8,404],[505::float8,606]])
+;
+
+DROP TABLE IF EXISTS unnest_2d_tbl01_groundtruth;
+CREATE TABLE unnest_2d_tbl01_groundtruth (
+ id INT,
+ unnest_row_id INT,
+ val DOUBLE PRECISION[]
+);
+INSERT INTO unnest_2d_tbl01_groundtruth VALUES
+ (1, 1, ARRAY[1::float8,2]),
+ (1, 2, ARRAY[3::float8,4]),
+ (1, 3, ARRAY[5::float8,6]),
+ (2, 1, ARRAY[101::float8,202]),
+ (2, 2, ARRAY[303::float8,404]),
+ (2, 3, ARRAY[505::float8,606])
+;
+
+DROP TABLE IF EXISTS unnest_2d_tbl01_out;
+CREATE TABLE unnest_2d_tbl01_out AS
+ SELECT id, (array_unnest_2d_to_1d(val)).* FROM unnest_2d_tbl01;
+
+SELECT assert(
+ unnest_result = val,
+ 'array_unnest_2d_to_1d: Wrong results for test table "unnest_2d_tbl01"'
+)
+FROM (
+ SELECT * FROM
+ unnest_2d_tbl01_out t1
+ JOIN
+ unnest_2d_tbl01_groundtruth t2
+ USING (id,unnest_row_id)
+) t3;
+
+-- 3-element float8 arrays
+DROP TABLE IF EXISTS unnest_2d_tbl02;
+CREATE TABLE unnest_2d_tbl02 (id INT, val DOUBLE PRECISION[][]);
+INSERT INTO unnest_2d_tbl02 VALUES
+ (1, ARRAY[[1.57::float8,2,3],[4::float8,5,6]]),
+ (2, ARRAY[[101::float8,202,303],[PI(),505,606]]),
+ (3, ARRAY[[1011::float8,2022,3033],[4044,5055,60.66]])
+;
+
+DROP TABLE IF EXISTS unnest_2d_tbl02_groundtruth;
+CREATE TABLE unnest_2d_tbl02_groundtruth (
+ id INT,
+ unnest_row_id INT,
+ val DOUBLE PRECISION[]
+);
+INSERT INTO unnest_2d_tbl02_groundtruth VALUES
+ (1, 1, array[1.57::float8,2,3]),
+ (1, 2, array[4::float8,5,6]),
+ (2, 1, array[101::float8,202,303]),
+ (2, 2, array[pi(),505,606]),
+ (3, 1, array[1011::float8,2022,3033]),
+ (3, 2, array[4044,5055,60.66])
+;
+
+DROP TABLE IF EXISTS unnest_2d_tbl02_out;
+CREATE TABLE unnest_2d_tbl02_out AS
+ SELECT id, (array_unnest_2d_to_1d(val)).* FROM unnest_2d_tbl02;
+
+SELECT assert(
+ unnest_result = val,
+ 'array_unnest_2d_to_1d: Wrong results for test table "unnest_2d_tbl02"'
+)
+FROM (
+ SELECT * FROM
+ unnest_2d_tbl02_out t1
+ JOIN
+ unnest_2d_tbl02_groundtruth t2
+ USING (id,unnest_row_id)
+) t3;
+
+-- 2-element text arrays
+DROP TABLE IF EXISTS unnest_2d_tbl03;
+CREATE TABLE unnest_2d_tbl03 (id INT, val TEXT[][]);
+INSERT INTO unnest_2d_tbl03 VALUES
+ (1, ARRAY[['a','b'],['c','d'],['e','f']]),
+ (2, ARRAY[['apple','banana'],['cherries','kiwi'],['lemon','mango']])
+;
+
+DROP TABLE IF EXISTS unnest_2d_tbl03_groundtruth;
+CREATE TABLE unnest_2d_tbl03_groundtruth (
+ id INT,
+ unnest_row_id INT,
+ val TEXT[]
+);
+INSERT INTO unnest_2d_tbl03_groundtruth VALUES
+ (1, 1, ARRAY['a','b']),
+ (1, 2, ARRAY['c','d']),
+ (1, 3, ARRAY['e','f']),
+ (2, 1, ARRAY['apple','banana']),
+ (2, 2, ARRAY['cherries','kiwi']),
+ (2, 3, ARRAY['lemon','mango'])
+;
+
+DROP TABLE IF EXISTS unnest_2d_tbl03_out;
+CREATE TABLE unnest_2d_tbl03_out AS
+ SELECT id, (array_unnest_2d_to_1d(val)).* FROM unnest_2d_tbl03;
+
+SELECT assert(
+ unnest_result = val,
+ 'array_unnest_2d_to_1d: Wrong results for test table "unnest_2d_tbl03"'
+)
+FROM (
+ SELECT * FROM
+ unnest_2d_tbl03_out t1
+ JOIN
+ unnest_2d_tbl03_groundtruth t2
+ USING (id,unnest_row_id)
+) t3;
+
+-- 3-element float8 arrays with some NULLs
+DROP TABLE IF EXISTS unnest_2d_tbl04;
+CREATE TABLE unnest_2d_tbl04 (id INT, val DOUBLE PRECISION[][]);
+INSERT INTO unnest_2d_tbl04 VALUES
+ (1, ARRAY[[1::float8,NULL,3],[4.0,5,NULL]]),
+ (2, ARRAY[[101::float8,NULL,303],
+ [NULL::float8,NULL,NULL]]::double precision[][]),
+ (3, ARRAY[[NULL,2022::float8],[4044::float8,NULL]])
+;
+
+DROP TABLE IF EXISTS unnest_2d_tbl04_groundtruth;
+CREATE TABLE unnest_2d_tbl04_groundtruth (
+ id INT,
+ unnest_row_id INT,
+ val DOUBLE PRECISION[]
+);
+INSERT INTO unnest_2d_tbl04_groundtruth VALUES
+ (1, 1, ARRAY[1::float8,NULL,3]),
+ (1, 2, ARRAY[4.0::float8,5,NULL]),
+ (2, 1, ARRAY[101::float8,NULL,303]),
+ (2, 2, ARRAY[NULL::float8,NULL,NULL]),
+ (3, 1, ARRAY[NULL,2022::float8]),
+ (3, 2, ARRAY[4044::float8,NULL])
+;
+
+DROP TABLE IF EXISTS unnest_2d_tbl04_out;
+CREATE TABLE unnest_2d_tbl04_out AS
+ SELECT id, (array_unnest_2d_to_1d(val)).* FROM unnest_2d_tbl04;
+
+SELECT assert(
+ unnest_result = val,
+ 'array_unnest_2d_to_1d: Wrong results for test table "unnest_2d_tbl04"'
+)
+FROM (
+ SELECT * FROM
+ unnest_2d_tbl04_out t1
+ JOIN
+ unnest_2d_tbl04_groundtruth t2
+ USING (id,unnest_row_id)
+) t3;
+
+-- 3-element timestamp arrays with NULLs
+DROP TABLE IF EXISTS unnest_2d_tbl05;
+CREATE TABLE unnest_2d_tbl05 (id INT, val TIMESTAMP WITHOUT TIME ZONE[][]);
+INSERT INTO unnest_2d_tbl05 VALUES
+ (1, array[['2017-01-01 11:00:02'::TIMESTAMP WITHOUT TIME ZONE,
+ '2017-01-01 13:00:05',
+ '2017-01-02 11:55:00'],
+ ['2016-10-12 12:00:02'::TIMESTAMP WITHOUT TIME ZONE,
+ '2016-10-12 13:15:22',
+ NULL]]),
+ (2, NULL),
+ (3, array[['2014-02-01 11:00:02'::TIMESTAMP WITHOUT TIME ZONE,
+ '2014-02-01 13:00:05',
+ '2014-02-02 11:55:00'],
+ ['2013-07-12 12:00:02'::TIMESTAMP WITHOUT TIME ZONE,
+ NULL,
+ '2013-07-12 13:15:22']])
+;
+
+DROP TABLE IF EXISTS unnest_2d_tbl05_groundtruth;
+CREATE TABLE unnest_2d_tbl05_groundtruth (
+ id INT,
+ unnest_row_id INT,
+ val TIMESTAMP WITHOUT TIME ZONE[]
+);
+INSERT INTO unnest_2d_tbl05_groundtruth VALUES
+ (1, 1, ARRAY['2017-01-01 11:00:02'::TIMESTAMP WITHOUT TIME ZONE,
+ '2017-01-01 13:00:05',
+ '2017-01-02 11:55:00']),
+ (1, 2, ARRAY['2016-10-12 12:00:02'::TIMESTAMP WITHOUT TIME ZONE,
+ '2016-10-12 13:15:22',
+ NULL]),
+ (2, NULL, NULL),
+ (3, 1, ARRAY['2014-02-01 11:00:02'::TIMESTAMP WITHOUT TIME ZONE,
+ '2014-02-01 13:00:05',
+ '2014-02-02 11:55:00']),
+ (3, 2, ARRAY['2013-07-12 12:00:02'::TIMESTAMP WITHOUT TIME ZONE,
+ NULL,
+ '2013-07-12 13:15:22'])
+;
+
+DROP TABLE IF EXISTS unnest_2d_tbl05_out;
+CREATE TABLE unnest_2d_tbl05_out AS
+ SELECT id, (array_unnest_2d_to_1d(val)).* FROM unnest_2d_tbl05;
+
+SELECT assert(
+ unnest_result = val,
+ 'array_unnest_2d_to_1d: Wrong results for test table "unnest_2d_tbl05"'
+)
+FROM (
+ SELECT * FROM
+ unnest_2d_tbl05_out t1
+ JOIN
+ unnest_2d_tbl05_groundtruth t2
+ USING (id,unnest_row_id)
+) t3;
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/3af18a93/src/ports/postgres/modules/kmeans/kmeans.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/kmeans/kmeans.sql_in b/src/ports/postgres/modules/kmeans/kmeans.sql_in
index f689dd6..b3cdd55 100644
--- a/src/ports/postgres/modules/kmeans/kmeans.sql_in
+++ b/src/ports/postgres/modules/kmeans/kmeans.sql_in
@@ -239,75 +239,98 @@ INSERT INTO km_sample VALUES
</pre>
-# Run k-means clustering using kmeans++ for centroid seeding:
<pre class="example">
+DROP TABLE IF EXISTS km_result;
+-- Run kmeans algorithm
+CREATE TABLE km_result AS
+SELECT * FROM madlib.kmeanspp('km_sample', 'points', 2,
+ 'madlib.squared_dist_norm2',
+ 'madlib.avg', 20, 0.001);
\\x on;
-SELECT * FROM madlib.kmeanspp( 'km_sample', -- Table of source data
- 'points', -- Column containing point co-ordinates
- 2, -- Number of centroids to calculate
- 'madlib.squared_dist_norm2', -- Distance function
- 'madlib.avg', -- Aggregate function
- 20, -- Number of iterations
- 0.001 -- Fraction of centroids reassigned to keep iterating
- );
+SELECT * FROM km_result;
</pre>
Result:
<pre class="result">
-centroids | {{13.7533333333333,1.905,2.425,16.0666666666667,90.3333333333333,2.805,2.98,0.29,2.005,5.40663333333333,1.04166666666667, 3.31833333333333,1020.83333333333},
- {14.255,1.9325,2.5025,16.05,110.5,3.055,2.9775,0.2975,1.845,6.2125,0.9975,3.365,1378.75}}
-cluster_variance | {122999.110416013,30561.74805}
-objective_fn | 153560.858466013
+centroids | {{14.036,2.018,2.536,16.56,108.6,3.004,3.03,0.298,2.038,6.10598,1.004,3.326,1340},{13.872,1.814,2.376,15.56,88.2,2.806,2.928,0.288,1.844,5.35198,1.044,3.348,988}}
+cluster_variance | {60672.638245208,90512.324426408}
+objective_fn | 151184.962671616
frac_reassigned | 0
-num_iterations | 3
+num_iterations | 2
</pre>
-# Calculate the simplified silhouette coefficient:
<pre class="example">
SELECT * FROM madlib.simple_silhouette( 'km_sample',
'points',
- (SELECT centroids FROM
- madlib.kmeanspp('km_sample',
- 'points',
- 2,
- 'madlib.squared_dist_norm2',
- 'madlib.avg',
- 20,
- 0.001)),
+ (SELECT centroids FROM km_result),
'madlib.dist_norm2'
);
</pre>
Result:
<pre class="result">
-simple_silhouette | 0.686314347664694
+simple_silhouette | 0.68978804882941
</pre>
-# Find the cluster assignment for each point:
<pre class="example">
\\x off;
-DROP TABLE IF EXISTS km_result;
--- Run kmeans algorithm
-CREATE TABLE km_result AS
-SELECT * FROM madlib.kmeanspp('km_sample', 'points', 2,
- 'madlib.squared_dist_norm2',
- 'madlib.avg', 20, 0.001);
-- Get point assignment
SELECT data.*, (madlib.closest_column(centroids, points)).column_id as cluster_id
FROM km_sample as data, km_result
ORDER BY data.pid;
</pre>
+Result:
<pre class="result">
- pid | points | cluster_id
+ pid | points | cluster_id
-----+--------------------------------------------------------------------+------------
- 1 | {14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065} | 0
- 2 | {13.2,1.78,2.14,11.2,1,2.65,2.76,0.26,1.28,4.38,1.05,3.49,1050} | 0
+ 1 | {14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065} | 1
+ 2 | {13.2,1.78,2.14,11.2,1,2.65,2.76,0.26,1.28,4.38,1.05,3.49,1050} | 1
3 | {13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.6799,1.03,3.17,1185} | 0
4 | {14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480} | 0
5 | {13.24,2.59,2.87,21,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735} | 1
6 | {14.2,1.76,2.45,15.2,112,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450} | 0
7 | {14.39,1.87,2.45,14.6,96,2.5,2.52,0.3,1.98,5.25,1.02,3.58,1290} | 0
8 | {14.06,2.15,2.61,17.6,121,2.6,2.51,0.31,1.25,5.05,1.06,3.58,1295} | 0
- 9 | {14.83,1.64,2.17,14,97,2.8,2.98,0.29,1.98,5.2,1.08,2.85,1045} | 0
- 10 | {13.86,1.35,2.27,16,98,2.98,3.15,0.22,1.85,7.2199,1.01,3.55,1045} | 0
+ 9 | {14.83,1.64,2.17,14,97,2.8,2.98,0.29,1.98,5.2,1.08,2.85,1045} | 1
+ 10 | {13.86,1.35,2.27,16,98,2.98,3.15,0.22,1.85,7.2199,1.01,3.55,1045} | 1
(10 rows)
</pre>
+-# Unnest the cluster centroids 2-D array to get a set of 1-D centroid arrays:
+<pre class="example">
+DROP TABLE IF EXISTS km_centroids_unnest;
+-- Run unnest function
+CREATE TABLE km_centroids_unnest AS
+SELECT (madlib.array_unnest_2d_to_1d(centroids)).*
+FROM km_result;
+SELECT * FROM km_centroids_unnest ORDER BY 1;
+</pre>
+Result:
+<pre class="result">
+ unnest_row_id | unnest_result
+---------------+----------------------------------------------------------------------------------
+ 1 | {14.036,2.018,2.536,16.56,108.6,3.004,3.03,0.298,2.038,6.10598,1.004,3.326,1340}
+ 2 | {13.872,1.814,2.376,15.56,88.2,2.806,2.928,0.288,1.844,5.35198,1.044,3.348,988}
+(2 rows)
+</pre>
+Note that the ID column returned by array_unnest_2d_to_1d()
+is not guaranteed to be the same as the cluster ID assigned by k-means.
+See below to create the correct cluster IDs.
+
+-# Create cluster IDs for 1-D centroid arrays so that cluster ID for any centroid
+can be matched to the cluster assignment for the data points:
+<pre class="example">
+SELECT cent.*, (madlib.closest_column(centroids, unnest_result)).column_id as cluster_id
+FROM km_centroids_unnest as cent, km_result
+ORDER BY cent.unnest_row_id;
+</pre>
+Result:
+<pre class="result">
+ unnest_row_id | unnest_result | cluster_id
+---------------+----------------------------------------------------------------------------------+------------
+ 1 | {14.036,2.018,2.536,16.56,108.6,3.004,3.03,0.298,2.038,6.10598,1.004,3.326,1340} | 0
+ 2 | {13.872,1.814,2.376,15.56,88.2,2.806,2.928,0.288,1.844,5.35198,1.044,3.348,988} | 1
+(2 rows)
+</pre>
+
-# Run the same example as above, but using array input. Create the input table:
<pre class="example">
DROP TABLE IF EXISTS km_arrayin CASCADE;