You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ok...@apache.org on 2017/06/16 20:57:47 UTC
[10/34] incubator-madlib git commit: Pivot: Add support for array
output
Pivot: Add support for array output
JIRA: MADLIB-1066
When total pivoted columns exceed the Postgresql limit (250 - 1600
depending on the type of columns), an array output becomes
essential. This commit adds support to get each pivoted set of columns
(all columns related to a particular value-aggregate combination) as an
array. There is also support for getting the output as madlib.svec.
Closes #108
Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/bb209bbb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/bb209bbb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/bb209bbb
Branch: refs/heads/latest_release
Commit: bb209bbb6e081a2838a3f698947529358792e47f
Parents: 6b466ea
Author: Rahul Iyer <ri...@apache.org>
Authored: Fri Mar 31 11:15:58 2017 -0700
Committer: Rahul Iyer <ri...@apache.org>
Committed: Fri Mar 31 11:15:58 2017 -0700
----------------------------------------------------------------------
.../postgres/modules/utilities/pivot.py_in | 238 +++++++++-----
.../postgres/modules/utilities/pivot.sql_in | 327 ++++++++-----------
.../modules/utilities/test/pivot.sql_in | 74 +++--
.../postgres/modules/utilities/utilities.py_in | 13 +-
.../modules/utilities/validate_args.py_in | 4 +-
5 files changed, 354 insertions(+), 302 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/pivot.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/pivot.py_in b/src/ports/postgres/modules/utilities/pivot.py_in
index 7e342f1..6d0ebae 100644
--- a/src/ports/postgres/modules/utilities/pivot.py_in
+++ b/src/ports/postgres/modules/utilities/pivot.py_in
@@ -42,6 +42,7 @@ from validate_args import columns_exist_in_table
from validate_args import table_is_empty
from validate_args import _get_table_schema_names
from validate_args import get_first_schema
+from validate_args import get_expr_type
m4_changequote(`<!', `!>')
@@ -49,14 +50,14 @@ m4_changequote(`<!', `!>')
def pivot(schema_madlib, source_table, out_table, index, pivot_cols,
pivot_values, aggregate_func=None, fill_value=None, keep_null=False,
- output_col_dictionary=False, **kwargs):
+ output_col_dictionary=False, output_type=None, **kwargs):
"""
Helper function that can be used to pivot tables
Args:
@param source_table The original data table
@param out_table The output table that contains the dummy
variable columns
- @param index The index columns to group by the records by
+ @param index The index columns to group the records by
@param pivot_cols The columns to pivot the table
@param pivot_values The value columns to be summarized in the
pivoted table
@@ -80,6 +81,16 @@ def pivot(schema_madlib, source_table, out_table, index, pivot_cols,
FROM pivset GROUP BY id ORDER BY id)
"""
+ def _fill_value_wrapper(sel_str):
+ """ Wrap a given SQL SELECT statement with COALESCE using a given fill value.
+
+ No-op if the fill value is not provided
+ """
+ if fill_value is not None:
+ return " COALESCE({0}, {1}) ".format(sel_str, fill_value)
+ else:
+ return sel_str
+
with MinWarning('warning'):
# If there are more than 1000 columns for the output table, we give a
@@ -93,6 +104,25 @@ def pivot(schema_madlib, source_table, out_table, index, pivot_cols,
indices = split_quoted_delimited_str(index)
pcols = split_quoted_delimited_str(pivot_cols)
pvals = split_quoted_delimited_str(pivot_values)
+
+ # output type for specific supported types
+ output_type = 'column' if not output_type else output_type.lower()
+ all_output_types = sorted(['array', 'column', 'svec'])
+ try:
+ # allow user to specify a prefix substring of
+ # supported output types. This works because the supported
+ # output types have unique prefixes.
+ output_type = next(s for s in all_output_types
+ if s.startswith(output_type))
+ except StopIteration:
+ # next() returns a StopIteration if no element found
+ plpy.error("Encoding categorical: Output type should be one of {0}".
+ format(','.join(all_output_types)))
+
+ is_array_output = output_type in ('array', 'svec')
+ # always build dictionary table if output is array
+ output_col_dictionary = True if is_array_output else output_col_dictionary
+
validate_pivot_coding(source_table, out_table, indices, pcols, pvals)
# Strip the end quotes for building output columns (this can only be
@@ -104,6 +134,8 @@ def pivot(schema_madlib, source_table, out_table, index, pivot_cols,
# value column.
agg_dict = parse_aggregates(pvals, aggregate_func)
+ validate_output_types(source_table, agg_dict, is_array_output)
+
# Find the distinct values of pivot_cols
array_agg_str = ', '.join("array_agg(DISTINCT {pcol}) AS {pcol}_values".
format(pcol=pcol) for pcol in pcols)
@@ -143,7 +175,7 @@ def pivot(schema_madlib, source_table, out_table, index, pivot_cols,
for pcol in pcols])))
# Check the max possible length of a output column name
- # If it is over 63 (psql upper limit) create table lookup
+ # If it is over 63 (postgresql upper limit) create dictionary lookup
for pval in pvals:
agg_func = agg_dict[pval]
# Length calculation: value column length + aggregate length +
@@ -159,122 +191,157 @@ def pivot(schema_madlib, source_table, out_table, index, pivot_cols,
format(**locals()))
output_col_dictionary = True
- # Create the output dictionary if needed
+ # Types of pivot columns are needed for building the right columns
+ # in the dictionary table and to decide if a pivot column value needs to
+ # be quoted during comparison (will be quoted if it's a text column)
+ types_str = ', '.join("pg_typeof(\"{pcol}\") as {pcol}".
+ format(pcol=p) for p in pcols)
+ pcol_types = plpy.execute("SELECT {0} FROM {1} LIMIT 1".
+ format(types_str, source_table))[0]
if output_col_dictionary:
out_dict = out_table + "_dictionary"
_assert(not table_exists(out_dict),
"Pivot: Output dictionary table already exists!")
-
- # Collect the types for pivot columns
- types_str = ','.join("pg_typeof(\"{pcol}\") as {pcol}_type".
- format(pcol=pcol) for pcol in pcols)
- pcol_types = plpy.execute("SELECT {0} FROM {1} LIMIT 1".
- format(types_str, source_table))
-
# Create the empty dictionary table
- dict_str = ', '.join(" {pcol} {pcol_type} ".
- format(pcol=pcol, pcol_type=pcol_types[0][pcol+"_type"])
- for pcol in pcols)
+ pcol_names_types = ', '.join(" {pcol} {pcol_type} ".
+ format(pcol=pcol,
+ pcol_type=pcol_types[pcol])
+ for pcol in pcols)
plpy.execute("""
CREATE TABLE {out_dict} (
- __pivot_cid__ VARCHAR, pval VARCHAR,
- agg VARCHAR, {dict_str}, col_name VARCHAR)
- """.format(**locals()))
-
- # The holder for rows to insert into output dictionary
- insert_str = []
+ __pivot_cid__ VARCHAR,
+ pval VARCHAR,
+ agg VARCHAR,
+ {pcol_names_types},
+ col_name VARCHAR)
+ """.format(out_dict=out_dict, pcol_names_types=pcol_names_types))
+
+ # List of rows to insert into output dictionary
+ dict_insert_str = []
# Counter for the new output column names
- dict_counter = 0
+ dict_counter = 1
- pivot_str_sel_list = []
- pivot_str_from_list = []
- # Prepare the wrapper for fill value
- if fill_value is not None:
- fill_str_begin = " COALESCE("
- fill_str_end = ", " + fill_value + " ) "
- else:
- fill_str_begin, fill_str_end = "", ""
+ pivot_sel_list = []
+ pivot_from_list = []
for pval in pvals:
agg_func = agg_dict[pval]
for agg in agg_func:
+
+ # is using array_output, create a new array for each pval-agg combo
+ if is_array_output:
+ # we store information in the dictionary table for each
+ # index in the array. 'index_counter' is the current index
+ # being updated (resets for each new array)
+ index_counter = 1
+
+ sub_pivot_sel_list = []
for comb in pivot_comb:
pivot_col_condition = []
- pivot_col_name = ["\"{pval}_{agg}".format(pval=pval, agg=agg)]
+ # note column name starts with double quotes
+ pivot_col_name = ['{pval}_{agg}'.format(pval=pval, agg=agg)]
if output_col_dictionary:
# Prepare the entry for the dictionary
- insert_str.append("(\'__p_{dict_counter}__\', \'{pval}\', "
- "\'{agg}\' ".format(dict_counter=dict_counter,
- pval=pval, agg=agg))
+ if not is_array_output:
+ index_name = ("__p_{dict_counter}__".
+ format(dict_counter=dict_counter))
+ else:
+ # for arrays, index_name is just the index into each array
+ index_name = str(index_counter)
+ index_counter += 1
+ dict_insert_str.append(
+ "(\'{index_name}\', \'{pval}\', \'{agg}\' ".
+ format(index_name=index_name, pval=pval, agg=agg))
# For every pivot column in a given combination
for counter, pcol in enumerate(pcols):
+ if comb[counter] is None:
+ quoted_pcol_value = "NULL"
+ elif pcol_types[pcol] in ("text", "varchar", "character varying"):
+ quoted_pcol_value = "'" + comb[counter] + "'"
+ else:
+ quoted_pcol_value = comb[counter]
+
# If we encounter a NULL value that means it is not filtered
# because of keep_null. Use "IS NULL" for comparison
if comb[counter] is None:
pivot_col_condition.append(" \"{0}\" IS NULL".format(pcol))
pivot_col_name.append("_{0}_null".format(pcol))
else:
- pivot_col_condition.append(" \"{0}\" = '{1}'".
- format(pcol, comb[counter]))
+ pivot_col_condition.append(" \"{0}\" = {1}".
+ format(pcol, quoted_pcol_value))
pivot_col_name.append("_{0}_{1}".format(pcol, comb[counter]))
- # Collect pcol values for the dict
if output_col_dictionary:
- insert_str.append("{0}".format(
- comb[counter] if comb[counter] is not None else "NULL"))
- pivot_col_name.append("\"")
+ dict_insert_str.append("{0}".format(quoted_pcol_value))
if output_col_dictionary:
- # Store the whole string in case some user wants it
- insert_str.append("\'{column_name}\')".
- format(column_name=''.join(pivot_col_name)))
- pivot_col_name = ["__p_"+str(dict_counter)+"__"]
+ # Store the whole string as additional info
+ dict_insert_str.append("'{0}')".format(''.join(pivot_col_name)))
+ pivot_col_name = ["__p_" + str(dict_counter) + "__"]
dict_counter += 1
+
# Collecting the whole sql query
# Please refer to the earlier comment for a sample output
-
# Build the pivot column with NULL values in tuples that don't
# satisfy that column's condition
- pivot_str_from = ("(CASE WHEN {condition} THEN {pval} END) "
- "AS {pivot_col_name}".
- format(pval=pval,
- condition=' AND '.join(pivot_col_condition),
- pivot_col_name=''.join(pivot_col_name)))
- pivot_str_from_list.append(pivot_str_from)
- # Aggregate over each pivot column, while filtering all NULL values
- # created by previous query.
- pivot_str_sel = ("{fill_str_begin}"
- " {agg} ({pivot_col_name}) "
- " FILTER (WHERE {pivot_col_name} IS NOT NULL) "
- "{fill_str_end} AS {pivot_col_name}".
- format(agg=agg, fill_str_begin=fill_str_begin,
- fill_str_end=fill_str_end,
- pivot_col_name=''.join(pivot_col_name)))
- pivot_str_sel_list.append(pivot_str_sel)
+ p_name = '"{0}"'.format(''.join(pivot_col_name))
+ pivot_str_from = (
+ "(CASE WHEN {condition} THEN {pval} END) AS {p_name}".
+ format(pval=pval,
+ condition=' AND '.join(pivot_col_condition),
+ p_name=p_name))
+ pivot_from_list.append(pivot_str_from)
+
+ # Aggregate over each pivot column, while filtering all NULL
+ # values created by previous query.
+ sub_pivot_str_sel = _fill_value_wrapper(
+ "{agg}({p_name}) "
+ " FILTER (WHERE {p_name} IS NOT NULL)".
+ format(agg=agg, p_name=p_name))
+ if not is_array_output:
+ # keep spaces around the 'AS'
+ sub_pivot_str_sel += " AS " + p_name
+ sub_pivot_sel_list.append(sub_pivot_str_sel)
+
+ if sub_pivot_sel_list:
+ if is_array_output:
+ if output_type is 'svec':
+ cast_str = '::FLOAT8[]::{0}.svec'.format(schema_madlib)
+ else:
+ cast_str = '::FLOAT8[]'
+ pivot_sel_list.append(
+ 'ARRAY[{all_pivot_sel}]{cast_str} AS "{pval}_{agg}"'.
+ format(all_pivot_sel=', '.join(sub_pivot_sel_list),
+ cast_str=cast_str,
+ pval=pval,
+ agg=agg))
+ else:
+ pivot_sel_list += sub_pivot_sel_list
try:
plpy.execute("""
CREATE TABLE {out_table} AS
SELECT {index},
- {pivot_str_sel_list}
+ {all_pivot_sel_str}
FROM (
SELECT {index},
- {pivot_str_from_list}
+ {all_pivot_from_str}
FROM {source_table}
) x
GROUP BY {index}
""".format(out_table=out_table,
index=index,
source_table=source_table,
- pivot_str_from_list=', '.join(pivot_str_from_list),
- pivot_str_sel_list=', '.join(pivot_str_sel_list)))
+ all_pivot_from_str=', '.join(pivot_from_list),
+ all_pivot_sel_str=', '.join(pivot_sel_list)
+ ))
if output_col_dictionary:
plpy.execute("INSERT INTO {out_dict} VALUES {insert_sql}".
format(out_dict=out_dict,
- insert_sql=', '.join(insert_str)))
+ insert_sql=', '.join(dict_insert_str)))
except plpy.SPIError:
# Warn user if the number of columns is over the limit
with MinWarning("warning"):
@@ -314,16 +381,16 @@ def parse_aggregates(pvals, aggregate_func):
5) A partial mapping (eg. 'val2=sum'): Use the default ('avg') for the
missing value columns
"""
- param_types = dict.fromkeys(pvals, list)
+ param_types = dict.fromkeys(pvals, tuple)
agg_dict = extract_keyvalue_params(aggregate_func, param_types)
if not agg_dict:
- agg_list = split_quoted_delimited_str(aggregate_func)
- agg_dict = dict.fromkeys(pvals, (agg_list if agg_list else ['avg']))
+ agg_list = tuple(split_quoted_delimited_str(aggregate_func))
+ agg_dict = dict.fromkeys(pvals, (agg_list if agg_list else ('avg', )))
else:
for pval in pvals:
if pval not in agg_dict:
- agg_dict[pval] = ['avg']
+ agg_dict[pval] = ('avg', )
return agg_dict
# ------------------------------------------------------------------------------
@@ -364,6 +431,26 @@ def validate_pivot_coding(source_table, out_table, indices, pivs, vals):
# ------------------------------------------------------------------------------
+def validate_output_types(source_table, agg_dict, is_array_output):
+ """
+ Args:
+ @param source_table: str, Name of table containing data
+ @param agg_dict: dict, Key-value pair containing aggregates applied for each val column
+ @param is_array_output: bool, Is the pivot output columnar (False) or array (True)
+
+ Returns:
+ None
+ """
+ for val, func_iterable in agg_dict.items():
+ for func in func_iterable:
+ func_call_str = '{0}({1})'.format(func, val)
+ _assert(not ('[]' in get_expr_type(func_call_str, source_table) and
+ is_array_output),
+ "Pivot: Aggregate {0} with an array return type cannot be "
+ "combined with output_type='array' or 'svec'".format(func))
+# ----------------------------------------------------------------------
+
+
def pivot_help(schema_madlib, message, **kwargs):
"""
Help function for pivot
@@ -401,14 +488,19 @@ For more details on function usage:
-- of the output pivot table
pivot_cols, -- Comma-separated columns that will form the
-- columns of the output pivot table
- pivot_values -- Comma-separated columns that contain the values
+ pivot_values, -- Comma-separated columns that contain the values
-- to be summarized in the output pivot table
- fill_value -- If specified, determines how to fill NULL values
+ fill_value, -- If specified, determines how to fill NULL values
-- resulting from pivot operation
- keep_null -- The flag for determining how to handle NULL
+ keep_null, -- The flag for determining how to handle NULL
-- values in pivot columns
- output_col_dictionary -- The flag for enabling the creation of the
+ output_col_dictionary, -- The flag for enabling the creation of the
-- output dictionary for shorter column names
+ output_type -- This parameter controls the output format
+ -- of the pivoted variables.
+ -- If 'column', a column is created for each pivot
+ -- If 'array', an array is created combining all pivots
+ -- If 'svec', the array is cast to madlib.svec
);
-----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/pivot.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/pivot.sql_in b/src/ports/postgres/modules/utilities/pivot.sql_in
index cb2c223..7cdfbe0 100644
--- a/src/ports/postgres/modules/utilities/pivot.sql_in
+++ b/src/ports/postgres/modules/utilities/pivot.sql_in
@@ -59,7 +59,8 @@ pivot(
aggregate_func,
fill_value,
keep_null,
- output_col_dictionary
+ output_col_dictionary,
+ output_type
)
</pre>
\b Arguments
@@ -67,6 +68,7 @@ pivot(
<dt>source_table</dt>
<dd>VARCHAR. Name of the source table (or view) containing data to
pivot.</dd>
+
<dt>output_table</dt>
<dd>VARCHAR. Name of output table that contains the pivoted data.
The output table contains all the columns present in
@@ -81,18 +83,21 @@ pivot(
- aggregate function
- name of the pivot column <em>'pivot_cols'</em>
- values in the pivot column
-
</dd>
+
<dt>index </dt>
<dd>VARCHAR. Comma-separated columns that will form the index of the output
pivot table. By index we mean the values to group by; these are the rows
in the output pivot table.</dd>
+
<dt>pivot_cols </dt>
<dd>VARCHAR. Comma-separated columns that will form the columns of the
output pivot table.</dd>
+
<dt>pivot_values </dt>
<dd>VARCHAR. Comma-separated columns that contain the values to be
summarized in the output pivot table.</dd>
+
<dt>aggregate_func (optional)</dt>
<dd>VARCHAR. default: 'AVG'. A comma-separated list of aggregates to be
applied to values. These can be PostgreSQL built-in aggregates [1] or UDAs. It is
@@ -113,10 +118,12 @@ pivot(
values resulting from pivot operation. This is a global parameter (not
applied per aggregate) and is applied post-aggregation to the output
table.</dd>
+
<dt>keep_null (optional)</dt>
<dd>BOOLEAN. default: FALSE. If TRUE, then pivot columns are created
corresponding to NULL categories. If FALSE, then no pivot columns will be
created for NULL categories.</dd>
+
<dt>output_col_dictionary (optional)</dt>
<dd>BOOLEAN. default: FALSE. This parameter is used to handle
auto-generated column names that exceed the PostgreSQL limit of 63 bytes
@@ -127,6 +134,19 @@ pivot(
a dictionary output file will be created and a message given to the user.
</dd>
+ <dt>output_type (optional)</dt>
+ <dd>VARCHAR. default: 'column'. This parameter controls the output format
+ of the pivoted variables. If 'column', a column is created for each pivot
+ variable. PostgreSQL limits the number of columns in a table
+ (250 - 1600 depending on column types).
+ If the total number of output columns exceeds this limit, then make this
+ parameter either 'array' (to combine the output columns into an array) or
+ 'svec' (to cast the array output to <em>'madlib.svec'</em> type).
+
+ A dictionary will be created (<em>output_col_dictionary=TRUE</em>)
+ when 'output_type' is 'array' or 'svec' to define each index into the array.
+ </dd>
+
</dl>
@anchor notes
@@ -138,8 +158,8 @@ allowed so NULLs are ignored.
- It is not allowed to set the fill_value parameter without setting the
aggregate_func parameter due to possible ambiguity. Set
aggregate_func to NULL for the default behavior and use fill_value as desired.
-Please note that full_value must be of the same type as the output of the
-aggregate_func (or capable of being cast to the same type by PostgreSQL),
+Please note that full_value must be of the same type as the output of the
+aggregate_func (or capable of being cast to the same type by PostgreSQL),
or else an error will result.
- It is not allowed to set the output_col_dictionary parameter without setting
the keep_null parameter due to possible ambiguity. Set
@@ -303,13 +323,9 @@ SELECT * FROM pivout ORDER BY id,id2;
| 0 | 8 | |
</pre>
--# Turn on the extended view for readability:
+-# Use multiple pivot columns with columnar output:
<pre class="example">
\\x on
-</pre>
-
--# Use multiple pivot columns:
-<pre class="example">
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val');
SELECT * FROM pivout ORDER BY id;
@@ -346,10 +362,47 @@ val_avg_piv_30_piv2_300 |
...
</pre>
+-# Use multiple pivot columns (same as above) with an array output:
+<pre class="example">
+DROP TABLE IF EXISTS pivout;
+SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val',
+ NULL, NULL, FALSE, FALSE, 'array');
+\\x off
+SELECT * FROM pivout ORDER BY id;
+</pre>
+<pre class="result">
+ id | val_avg
+--------+------------------------------------------------------------
+ 0 | {1,2,NULL,NULL,NULL,3,NULL,NULL,NULL,NULL,NULL,NULL}
+ 1 | {NULL,NULL,7,NULL,NULL,4,NULL,NULL,NULL,NULL,5.5,NULL}
+ [NULL] | {NULL,NULL,NULL,8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
+</pre>
+<pre class="example">
+-- Use the dictionary to understand what each index of an array corresponds to
+SELECT * FROM pivout_dictionary;
+</pre>
+<pre class="result">
+ __pivot_cid__ | pval | agg | piv | piv2 | col_name
+---------------+------+-----+-----+------+---------------------------
+ 1 | val | avg | 10 | 0 | "val_avg_piv_10_piv2_0"
+ 2 | val | avg | 10 | 100 | "val_avg_piv_10_piv2_100"
+ 3 | val | avg | 10 | 200 | "val_avg_piv_10_piv2_200"
+ 4 | val | avg | 10 | 300 | "val_avg_piv_10_piv2_300"
+ 5 | val | avg | 20 | 0 | "val_avg_piv_20_piv2_0"
+ 6 | val | avg | 20 | 100 | "val_avg_piv_20_piv2_100"
+ 7 | val | avg | 20 | 200 | "val_avg_piv_20_piv2_200"
+ 8 | val | avg | 20 | 300 | "val_avg_piv_20_piv2_300"
+ 9 | val | avg | 30 | 0 | "val_avg_piv_30_piv2_0"
+ 10 | val | avg | 30 | 100 | "val_avg_piv_30_piv2_100"
+ 11 | val | avg | 30 | 200 | "val_avg_piv_30_piv2_200"
+ 12 | val | avg | 30 | 300 | "val_avg_piv_30_piv2_300"
+</pre>
+
-# Use multiple value columns:
<pre class="example">
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2');
+\\x on
SELECT * FROM pivout ORDER BY id;
</pre>
<pre class="result">
@@ -377,6 +430,7 @@ val2_avg_piv_30 | 15.5
<pre class="example">
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'avg, sum');
+\\x on
SELECT * FROM pivout ORDER BY id;
</pre>
<pre class="result">
@@ -404,6 +458,7 @@ val_sum_piv_30 | 11
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2',
'val=avg, val2=sum');
+\\x on
SELECT * FROM pivout ORDER BY id;
</pre>
<pre class="result">
@@ -431,6 +486,7 @@ val2_sum_piv_30 | 31
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2',
'val=avg, val2=[avg,sum]');
+\\x on
SELECT * FROM pivout ORDER BY id;
</pre>
<pre class="result">
@@ -464,6 +520,7 @@ val2_sum_piv_30 | 31
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2',
'val=avg, val2=[avg,sum]', '111', True);
+\\x on
SELECT * FROM pivout ORDER BY id,id2;
</pre>
<pre class="result">
@@ -492,32 +549,7 @@ val2_avg_piv_null_piv2_200 | 111
val2_avg_piv_null_piv2_300 | 111
val2_avg_piv_10_piv2_0 | 11
val2_avg_piv_10_piv2_100 | 111
-val2_avg_piv_10_piv2_200 | 111
-val2_avg_piv_10_piv2_300 | 111
-val2_avg_piv_20_piv2_0 | 111
-val2_avg_piv_20_piv2_100 | 111
-val2_avg_piv_20_piv2_200 | 111
-val2_avg_piv_20_piv2_300 | 111
-val2_avg_piv_30_piv2_0 | 111
-val2_avg_piv_30_piv2_100 | 111
-val2_avg_piv_30_piv2_200 | 111
-val2_avg_piv_30_piv2_300 | 111
-val2_sum_piv_null_piv2_0 | 111
-val2_sum_piv_null_piv2_100 | 111
-val2_sum_piv_null_piv2_200 | 111
-val2_sum_piv_null_piv2_300 | 111
-val2_sum_piv_10_piv2_0 | 11
-val2_sum_piv_10_piv2_100 | 111
-val2_sum_piv_10_piv2_200 | 111
-val2_sum_piv_10_piv2_300 | 111
-val2_sum_piv_20_piv2_0 | 111
-val2_sum_piv_20_piv2_100 | 111
-val2_sum_piv_20_piv2_200 | 111
-val2_sum_piv_20_piv2_300 | 111
-val2_sum_piv_30_piv2_0 | 111
-val2_sum_piv_30_piv2_100 | 111
-val2_sum_piv_30_piv2_200 | 111
-val2_sum_piv_30_piv2_300 | 111
+...
-[ RECORD 2 ]--------------+-----
id | 0
id2 | 1
@@ -541,34 +573,6 @@ val2_avg_piv_null_piv2_0 | 111
val2_avg_piv_null_piv2_100 | 111
val2_avg_piv_null_piv2_200 | 111
val2_avg_piv_null_piv2_300 | 111
-val2_avg_piv_10_piv2_0 | 111
-val2_avg_piv_10_piv2_100 | 12
-val2_avg_piv_10_piv2_200 | 111
-val2_avg_piv_10_piv2_300 | 111
-val2_avg_piv_20_piv2_0 | 111
-val2_avg_piv_20_piv2_100 | 13
-val2_avg_piv_20_piv2_200 | 111
-val2_avg_piv_20_piv2_300 | 111
-val2_avg_piv_30_piv2_0 | 111
-val2_avg_piv_30_piv2_100 | 111
-val2_avg_piv_30_piv2_200 | 111
-val2_avg_piv_30_piv2_300 | 111
-val2_sum_piv_null_piv2_0 | 111
-val2_sum_piv_null_piv2_100 | 111
-val2_sum_piv_null_piv2_200 | 111
-val2_sum_piv_null_piv2_300 | 111
-val2_sum_piv_10_piv2_0 | 111
-val2_sum_piv_10_piv2_100 | 12
-val2_sum_piv_10_piv2_200 | 111
-val2_sum_piv_10_piv2_300 | 111
-val2_sum_piv_20_piv2_0 | 111
-val2_sum_piv_20_piv2_100 | 13
-val2_sum_piv_20_piv2_200 | 111
-val2_sum_piv_20_piv2_300 | 111
-val2_sum_piv_30_piv2_0 | 111
-val2_sum_piv_30_piv2_100 | 111
-val2_sum_piv_30_piv2_200 | 111
-val2_sum_piv_30_piv2_300 | 111
...
</pre>
@@ -577,74 +581,49 @@ val2_sum_piv_30_piv2_300 | 111
DROP TABLE IF EXISTS pivout, pivout_dictionary;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2',
'val=avg, val2=[avg,sum]', '111', True, True);
-SELECT * FROM pivout_dictionary;
+\\x off
+SELECT * FROM pivout_dictionary order by __pivot_cid__;
</pre>
<pre class="result">
- __pivot_cid__ | pval | agg | piv | piv2 | col_name
----------------+------+-----+-----+------+------------------------------
- __p_1__ | val | avg | | 100 | "val_avg_piv_null_piv2_100"
- __p_5__ | val | avg | 10 | 100 | "val_avg_piv_10_piv2_100"
- __p_9__ | val | avg | 20 | 100 | "val_avg_piv_20_piv2_100"
- __p_12__ | val | avg | 30 | 0 | "val_avg_piv_30_piv2_0"
- __p_16__ | val2 | avg | | 0 | "val2_avg_piv_null_piv2_0"
- __p_23__ | val2 | avg | 10 | 300 | "val2_avg_piv_10_piv2_300"
- __p_27__ | val2 | avg | 20 | 300 | "val2_avg_piv_20_piv2_300"
- __p_30__ | val2 | avg | 30 | 200 | "val2_avg_piv_30_piv2_200"
- __p_34__ | val2 | sum | | 200 | "val2_sum_piv_null_piv2_200"
- __p_38__ | val2 | sum | 10 | 200 | "val2_sum_piv_10_piv2_200"
- __p_41__ | val2 | sum | 20 | 100 | "val2_sum_piv_20_piv2_100"
- __p_45__ | val2 | sum | 30 | 100 | "val2_sum_piv_30_piv2_100"
- __p_2__ | val | avg | | 200 | "val_avg_piv_null_piv2_200"
- __p_6__ | val | avg | 10 | 200 | "val_avg_piv_10_piv2_200"
- __p_11__ | val | avg | 20 | 300 | "val_avg_piv_20_piv2_300"
- __p_15__ | val | avg | 30 | 300 | "val_avg_piv_30_piv2_300"
- __p_19__ | val2 | avg | | 300 | "val2_avg_piv_null_piv2_300"
- __p_20__ | val2 | avg | 10 | 0 | "val2_avg_piv_10_piv2_0"
- __p_24__ | val2 | avg | 20 | 0 | "val2_avg_piv_20_piv2_0"
- __p_28__ | val2 | avg | 30 | 0 | "val2_avg_piv_30_piv2_0"
- __p_33__ | val2 | sum | | 100 | "val2_sum_piv_null_piv2_100"
- __p_37__ | val2 | sum | 10 | 100 | "val2_sum_piv_10_piv2_100"
- __p_42__ | val2 | sum | 20 | 200 | "val2_sum_piv_20_piv2_200"
- __p_46__ | val2 | sum | 30 | 200 | "val2_sum_piv_30_piv2_200"
- __p_3__ | val | avg | | 300 | "val_avg_piv_null_piv2_300"
- __p_7__ | val | avg | 10 | 300 | "val_avg_piv_10_piv2_300"
- __p_10__ | val | avg | 20 | 200 | "val_avg_piv_20_piv2_200"
- __p_14__ | val | avg | 30 | 200 | "val_avg_piv_30_piv2_200"
- __p_18__ | val2 | avg | | 200 | "val2_avg_piv_null_piv2_200"
- __p_21__ | val2 | avg | 10 | 100 | "val2_avg_piv_10_piv2_100"
- __p_25__ | val2 | avg | 20 | 100 | "val2_avg_piv_20_piv2_100"
- __p_29__ | val2 | avg | 30 | 100 | "val2_avg_piv_30_piv2_100"
- __p_32__ | val2 | sum | | 0 | "val2_sum_piv_null_piv2_0"
- __p_36__ | val2 | sum | 10 | 0 | "val2_sum_piv_10_piv2_0"
- __p_43__ | val2 | sum | 20 | 300 | "val2_sum_piv_20_piv2_300"
- __p_47__ | val2 | sum | 30 | 300 | "val2_sum_piv_30_piv2_300"
- __p_0__ | val | avg | | 0 | "val_avg_piv_null_piv2_0"
- __p_4__ | val | avg | 10 | 0 | "val_avg_piv_10_piv2_0"
- __p_8__ | val | avg | 20 | 0 | "val_avg_piv_20_piv2_0"
- __p_13__ | val | avg | 30 | 100 | "val_avg_piv_30_piv2_100"
- __p_17__ | val2 | avg | | 100 | "val2_avg_piv_null_piv2_100"
- __p_22__ | val2 | avg | 10 | 200 | "val2_avg_piv_10_piv2_200"
- __p_26__ | val2 | avg | 20 | 200 | "val2_avg_piv_20_piv2_200"
- __p_31__ | val2 | avg | 30 | 300 | "val2_avg_piv_30_piv2_300"
- __p_35__ | val2 | sum | | 300 | "val2_sum_piv_null_piv2_300"
- __p_39__ | val2 | sum | 10 | 300 | "val2_sum_piv_10_piv2_300"
- __p_40__ | val2 | sum | 20 | 0 | "val2_sum_piv_20_piv2_0"
- __p_44__ | val2 | sum | 30 | 0 | "val2_sum_piv_30_piv2_0"
+__pivot_cid__ | pval | agg | piv | piv2 | col_name
+---------------+------+-----+--------+------+------------------------------
+ __p_1__ | val | avg | [NULL] | 0 | "val_avg_piv_null_piv2_0"
+ __p_2__ | val | avg | [NULL] | 100 | "val_avg_piv_null_piv2_100"
+ __p_3__ | val | avg | [NULL] | 200 | "val_avg_piv_null_piv2_200"
+ __p_4__ | val | avg | [NULL] | 300 | "val_avg_piv_null_piv2_300"
+ __p_5__ | val | avg | 10 | 0 | "val_avg_piv_10_piv2_0"
+ __p_6__ | val | avg | 10 | 100 | "val_avg_piv_10_piv2_100"
+ __p_7__ | val | avg | 10 | 200 | "val_avg_piv_10_piv2_200"
+ __p_8__ | val | avg | 10 | 300 | "val_avg_piv_10_piv2_300"
+ __p_9__ | val | avg | 20 | 0 | "val_avg_piv_20_piv2_0"
+ __p_10__ | val | avg | 20 | 100 | "val_avg_piv_20_piv2_100"
+ __p_11__ | val | avg | 20 | 200 | "val_avg_piv_20_piv2_200"
+ __p_12__ | val | avg | 20 | 300 | "val_avg_piv_20_piv2_300"
+ __p_13__ | val | avg | 30 | 0 | "val_avg_piv_30_piv2_0"
+ __p_14__ | val | avg | 30 | 100 | "val_avg_piv_30_piv2_100"
+ __p_15__ | val | avg | 30 | 200 | "val_avg_piv_30_piv2_200"
+ __p_16__ | val | avg | 30 | 300 | "val_avg_piv_30_piv2_300"
+ __p_17__ | val2 | avg | [NULL] | 0 | "val2_avg_piv_null_piv2_0"
+ __p_18__ | val2 | avg | [NULL] | 100 | "val2_avg_piv_null_piv2_100"
+ __p_19__ | val2 | avg | [NULL] | 200 | "val2_avg_piv_null_piv2_200"
+ __p_20__ | val2 | avg | [NULL] | 300 | "val2_avg_piv_null_piv2_300"
+ __p_21__ | val2 | avg | 10 | 0 | "val2_avg_piv_10_piv2_0"
+...
(48 rows)
</pre>
<pre class="example">
+\\x on
SELECT * FROM pivout ORDER BY id,id2;
</pre>
<pre class="result">
--[ RECORD 1 ]--
+-[ RECORD 1 ]----
id | 0
id2 | 0
-__p_0__ | 111
__p_1__ | 111
__p_2__ | 111
__p_3__ | 111
-__p_4__ | 1
-__p_5__ | 111
+__p_4__ | 111
+__p_5__ | 1
__p_6__ | 111
__p_7__ | 111
__p_8__ | 111
@@ -653,91 +632,40 @@ __p_10__ | 111
__p_11__ | 111
__p_12__ | 111
__p_13__ | 111
-__p_14__ | 111
-__p_15__ | 111
-__p_16__ | 111
-__p_17__ | 111
-__p_18__ | 111
-__p_19__ | 111
-__p_20__ | 11
-__p_21__ | 111
-__p_22__ | 111
-__p_23__ | 111
-__p_24__ | 111
-__p_25__ | 111
-__p_26__ | 111
-__p_27__ | 111
-__p_28__ | 111
-__p_29__ | 111
-__p_30__ | 111
-__p_31__ | 111
-__p_32__ | 111
-__p_33__ | 111
-__p_34__ | 111
-__p_35__ | 111
-__p_36__ | 11
-__p_37__ | 111
-__p_38__ | 111
-__p_39__ | 111
-__p_40__ | 111
-__p_41__ | 111
-__p_42__ | 111
-__p_43__ | 111
-__p_44__ | 111
-__p_45__ | 111
-__p_46__ | 111
-__p_47__ | 111
--[ RECORD 2 ]--
+...
+-[ RECORD 2 ]----
id | 0
id2 | 1
-__p_0__ | 111
__p_1__ | 111
__p_2__ | 111
__p_3__ | 111
__p_4__ | 111
-__p_5__ | 2
+__p_5__ | 111
+__p_6__ | 2
+__p_7__ | 111
+__p_8__ | 111
+__p_9__ | 111
+__p_10__ | 3
+__p_11__ | 111
+__p_12__ | 111
+__p_13__ | 111
+...
+-[ RECORD 3 ]----
+id | 1
+id2 | 0
+__p_1__ | 111
+__p_2__ | 111
+__p_3__ | 111
+__p_4__ | 111
+__p_5__ | 111
__p_6__ | 111
__p_7__ | 111
__p_8__ | 111
-__p_9__ | 3
+__p_9__ | 111
__p_10__ | 111
__p_11__ | 111
__p_12__ | 111
__p_13__ | 111
-__p_14__ | 111
-__p_15__ | 111
-__p_16__ | 111
-__p_17__ | 111
-__p_18__ | 111
-__p_19__ | 111
-__p_20__ | 111
-__p_21__ | 12
-__p_22__ | 111
-__p_23__ | 111
-__p_24__ | 111
-__p_25__ | 13
-__p_26__ | 111
-__p_27__ | 111
-__p_28__ | 111
-__p_29__ | 111
-__p_30__ | 111
-__p_31__ | 111
-__p_32__ | 111
-__p_33__ | 111
-__p_34__ | 111
-__p_35__ | 111
-__p_36__ | 111
-__p_37__ | 12
-__p_38__ | 111
-__p_39__ | 111
-__p_40__ | 111
-__p_41__ | 13
-__p_42__ | 111
-__p_43__ | 111
-__p_44__ | 111
-__p_45__ | 111
-__p_46__ | 111
-__p_47__ | 111
...
</pre>
@@ -786,7 +714,8 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
aggregate_func TEXT,
fill_value TEXT,
keep_null BOOLEAN,
- output_col_dictionary BOOLEAN
+ output_col_dictionary BOOLEAN,
+ output_type TEXT
) RETURNS VOID AS $$
PythonFunction(utilities, pivot, pivot)
@@ -794,6 +723,22 @@ $$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
+ source_table TEXT,
+ out_table TEXT,
+ index TEXT,
+ pivot_cols TEXT,
+ pivot_values TEXT,
+ aggregate_func TEXT,
+ fill_value TEXT,
+ keep_null BOOLEAN,
+ output_col_dictionary BOOLEAN
+
+) RETURNS VOID AS $$
+ SELECT MADLIB_SCHEMA.pivot($1, $2, $3, $4, $5, $6, $7, $8, $9, NULL)
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
source_table TEXT,
out_table TEXT,
index TEXT,
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/test/pivot.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/test/pivot.sql_in b/src/ports/postgres/modules/utilities/test/pivot.sql_in
index 79bcc57..3dafd18 100644
--- a/src/ports/postgres/modules/utilities/test/pivot.sql_in
+++ b/src/ports/postgres/modules/utilities/test/pivot.sql_in
@@ -28,16 +28,16 @@ CREATE TABLE pivset(
);
INSERT INTO pivset VALUES
- (0, 10, 1),
- (0, 10, 2),
- (0, 20, 3),
- (1, 20, 4),
- (1, 30, 5),
- (1, 30, 6),
- (1, 10, 7),
- (NULL, 10, 8),
- (0, NULL, 9),
- (0, 10, NULL);
+ (0, 10, 1),
+ (0, 10, 2),
+ (0, 20, 3),
+ (1, 20, 4),
+ (1, 30, 5),
+ (1, 30, 6),
+ (1, 10, 7),
+ (NULL, 10, 8),
+ (0, NULL, 9),
+ (0, 10, NULL);
DROP TABLE IF EXISTS pivout;
SELECT pivot('pivset', 'pivout', 'id', 'piv', 'val');
@@ -47,12 +47,12 @@ SELECT assert(val_avg_piv_20 = 3, 'Wrong output in pivoting') FROM pivout WHERE
DROP VIEW IF EXISTS pivset_ext;
CREATE VIEW pivset_ext AS
- SELECT *,
+ SELECT *,
COALESCE(id + (pivset.val / 3), 0) AS id2,
COALESCE(piv + (pivset.val / 3), 0) AS piv2,
COALESCE(val + 10, 0) AS val2
FROM pivset;
-SELECT id,id2,piv,piv2,val,val2 FROM pivset_ext
+SELECT id,id2,piv,piv2,val,val2 FROM pivset_ext
ORDER BY id,id2,piv,piv2,val,val2;
DROP TABLE IF EXISTS pivout;
@@ -60,87 +60,87 @@ SELECT pivot('pivset_ext', 'pivout', 'id,id2', 'piv', 'val');
SELECT * FROM pivout;
SELECT assert(val_avg_piv_10 = 1.5,
- 'Wrong output in pivoting: index columns') FROM pivout
- WHERE id = 0 AND id2 = 0;
+ 'Wrong output in pivoting: index columns') FROM pivout
+ WHERE id = 0 AND id2 = 0;
DROP TABLE IF EXISTS pivout;
SELECT pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val');
SELECT * FROM pivout;
SELECT assert(val_avg_piv_10_piv2_10 = 1.5,
- 'Wrong output in pivoting: pivot columns') FROM pivout WHERE id = 0;
+ 'Wrong output in pivoting: pivot columns') FROM pivout WHERE id = 0;
DROP TABLE IF EXISTS pivout;
SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2');
SELECT * FROM pivout;
SELECT assert(val2_avg_piv_20 = 13,
- 'Wrong output in pivoting: value columns') FROM pivout WHERE id = 0;
+ 'Wrong output in pivoting: value columns') FROM pivout WHERE id = 0;
DROP TABLE IF EXISTS pivout;
SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum');
SELECT * FROM pivout;
SELECT assert(val_sum_piv_10 = 3,
- 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
+ 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
DROP TABLE IF EXISTS pivout;
SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', True);
SELECT * FROM pivout;
SELECT assert(val_sum_piv_null = 9,
- 'Wrong output in pivoting: keep null') FROM pivout WHERE id = 0;
+ 'Wrong output in pivoting: keep null') FROM pivout WHERE id = 0;
DROP TABLE IF EXISTS pivout;
SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', '111');
SELECT * FROM pivout;
SELECT assert(val_sum_piv_30 = 111,
- 'Wrong output in pivoting: fill value') FROM pivout WHERE id = 0;
+ 'Wrong output in pivoting: fill value') FROM pivout WHERE id = 0;
DROP TABLE IF EXISTS pivout;
SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', '111', True);
SELECT * FROM pivout;
SELECT assert(val_sum_piv_30 = 111 AND val_sum_piv_null = 9,
- 'Wrong output in pivoting: fill value') FROM pivout WHERE id = 0;
+ 'Wrong output in pivoting: fill value') FROM pivout WHERE id = 0;
DROP TABLE IF EXISTS pivout;
SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'avg, sum');
SELECT * FROM pivout;
SELECT assert(val_avg_piv_10 = 1.5 AND val_sum_piv_10 = 3,
- 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
+ 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
DROP TABLE IF EXISTS pivout;
SELECT pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val', 'avg, sum');
SELECT * FROM pivout;
SELECT assert(val_avg_piv_10_piv2_10 = 1.5 AND val_sum_piv_10_piv2_10 = 3,
- 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
+ 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
DROP TABLE IF EXISTS pivout;
SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2', 'avg, sum');
SELECT * FROM pivout;
SELECT assert(val_sum_piv_10 = 3 AND val2_avg_piv_20 = 13,
- 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
+ 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
DROP TABLE IF EXISTS pivout;
SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2',
- 'val=avg, val2=sum');
+ 'val=avg, val2=sum');
SELECT * FROM pivout;
SELECT assert(val_avg_piv_10 = 1.5 AND val2_sum_piv_10 = 23,
- 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
+ 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
DROP TABLE IF EXISTS pivout;
SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2',
- 'val=avg, val2=[avg,sum]');
+ 'val=avg, val2=[avg,sum]');
SELECT * FROM pivout;
SELECT assert(val2_avg_piv_20 = 13 AND val2_sum_piv_10 = 23,
- 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
+ 'Wrong output in pivoting: aggregate functions') FROM pivout WHERE id = 0;
DROP TABLE IF EXISTS pivout;
DROP TABLE IF EXISTS pivout_dictionary;
@@ -148,9 +148,9 @@ SELECT pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2',
'val=avg, val2=[avg,sum]', '111', True, True);
SELECT * FROM pivout;
-SELECT assert(__p_7__ = 1.5,
- 'Wrong output in pivoting: Output dictionary') FROM pivout
- WHERE id = 0 AND id2 = 0;
+SELECT assert(__p_8__ = 1.5,
+ 'Wrong output in pivoting: Output dictionary') FROM pivout
+ WHERE id = 0 AND id2 = 0;
DROP FUNCTION IF EXISTS array_add1(ANYARRAY, ANYELEMENT);
DROP AGGREGATE IF EXISTS array_accum1 (anyelement);
@@ -167,4 +167,18 @@ DROP TABLE IF EXISTS pivout;
SELECT pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'array_accum1');
SELECT * FROM pivout;
+DROP TABLE IF EXISTS pivout;
+DROP TABLE IF EXISTS pivout_dictionary;
+SELECT pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2',
+ 'val=avg, val2=[avg,sum]', '111', True, True, 'a');
+SELECT * FROM pivout;
+SELECT * FROM pivout_dictionary;
+
+DROP TABLE IF EXISTS pivout;
+DROP TABLE IF EXISTS pivout_dictionary;
+SELECT pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2',
+ 'val=avg, val2=[avg,sum]', '111', True, True, 's');
+SELECT * FROM pivout;
+SELECT * FROM pivout_dictionary;
+
DROP VIEW IF EXISTS pivset_ext;
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/utilities.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in
index 0e47aea..126f4e6 100644
--- a/src/ports/postgres/modules/utilities/utilities.py_in
+++ b/src/ports/postgres/modules/utilities/utilities.py_in
@@ -1,4 +1,5 @@
+import collections
import re
import time
import random
@@ -583,16 +584,16 @@ def extract_keyvalue_params(input_params,
else:
continue
try:
- if param_type in (int, str, float):
- parameter_dict[param_name] = param_type(param_value)
- elif param_type == list:
- parameter_dict[param_name] = split_quoted_delimited_str(
- param_value.strip('[](){} '))
- elif param_type == bool:
+ if param_type == bool: # bool is not subclassable
# True values are y, yes, t, true, on and 1;
# False values are n, no, f, false, off and 0.
# Raises ValueError if anything else.
parameter_dict[param_name] = bool(strtobool(param_value))
+ elif param_type in (int, str, float):
+ parameter_dict[param_name] = param_type(param_value)
+ elif issubclass(param_type, collections.Iterable):
+ parameter_dict[param_name] = split_quoted_delimited_str(
+ param_value.strip('[](){} '))
else:
raise TypeError("Invalid input: {0} has unsupported type "
"{1}".format(param_name, usage_str))
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/bb209bbb/src/ports/postgres/modules/utilities/validate_args.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/validate_args.py_in b/src/ports/postgres/modules/utilities/validate_args.py_in
index 91f34b8..5832124 100644
--- a/src/ports/postgres/modules/utilities/validate_args.py_in
+++ b/src/ports/postgres/modules/utilities/validate_args.py_in
@@ -345,8 +345,9 @@ def get_cols_and_types(tbl):
def get_expr_type(expr, tbl):
- """ Temporary function to obtain the type of an expression
+ """ Return the type of an expression run on a given table
+ Note: this
Args:
@param expr
@@ -356,7 +357,6 @@ def get_expr_type(expr, tbl):
expr_type = plpy.execute("""
SELECT pg_typeof({0}) AS type
FROM {1}
- WHERE ({0}) IS NOT NULL
LIMIT 1
""".format(expr, tbl))[0]['type']
return expr_type.upper()