You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2016/01/20 22:23:42 UTC
[1/2] incubator-madlib git commit: Minor: Update return value in
correlation + matrix
Repository: incubator-madlib
Updated Branches:
refs/heads/master eec20a4bc -> d282e0f36
Minor: Update return value in correlation + matrix
- In correlation, a special UDT was returned containing run time stats.
This has been replaced by a TEXT that contains required information.
- In matrix, the decomposition operations did not provide enough
information about how to use suffixes to query the result tables. This
has now been added.
Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/83391ace
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/83391ace
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/83391ace
Branch: refs/heads/master
Commit: 83391ace1d8d8524b774a76a1328ce4967127031
Parents: eec20a4
Author: Rahul Iyer <ri...@pivotal.io>
Authored: Mon Jan 18 11:39:20 2016 -0800
Committer: Rahul Iyer <ri...@pivotal.io>
Committed: Wed Jan 20 13:08:02 2016 -0800
----------------------------------------------------------------------
.../postgres/modules/linalg/matrix_ops.sql_in | 33 ++++++++++++++++----
.../postgres/modules/stats/correlation.py_in | 31 +++++++++---------
.../postgres/modules/stats/correlation.sql_in | 26 +++++----------
3 files changed, 51 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/83391ace/src/ports/postgres/modules/linalg/matrix_ops.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/linalg/matrix_ops.sql_in b/src/ports/postgres/modules/linalg/matrix_ops.sql_in
index 46a728d..bddecc1 100644
--- a/src/ports/postgres/modules/linalg/matrix_ops.sql_in
+++ b/src/ports/postgres/modules/linalg/matrix_ops.sql_in
@@ -2599,8 +2599,16 @@ RETURNS MADLIB_SCHEMA.matrix_result AS $$
PythonFunctionBodyOnly(`linalg', `matrix_ops')
matrix_ops.matrix_lu(schema_madlib, matrix_in, in_args,
matrix_out_prefix, out_args)
- return ["Prefix = {0}, add _p, _l, _u, _q at the end to "
- "get respective matrices".format(matrix_out_prefix)]
+ result = """
+ For LU decomposition with full pivoting (PAQ = LU),
+ the corresponding matrices can be obtained by appending following suffixes
+ to {0}:
+ _p for row permutation matrix P
+ _q for column permutation matrix Q
+ _l for lower triangular factor L
+ _u for upper triangular factor U
+ """.format(matrix_out_prefix)
+ return [result]
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
@@ -2710,8 +2718,15 @@ RETURNS MADLIB_SCHEMA.matrix_result AS $$
PythonFunctionBodyOnly(`linalg', `matrix_ops')
matrix_ops.matrix_cholesky(schema_madlib, matrix_in, in_args,
matrix_out_prefix, out_args)
- return ["Prefix = {0}, add _p, _l, _d at the end of prefix "
- "get the P, L and D matrices respectively".format(matrix_out_prefix)]
+ result = """
+ For Cholesky decomposition with left pivoting (PA = LDL*),
+ the corresponding matrices can be obtained by appending following suffixes
+ to {0}:
+ _p for row permutation matrix P
+ _l for lower triangular factor L
+ _d for diagonal matrix D
+ """.format(matrix_out_prefix)
+ return [result]
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
@@ -2766,8 +2781,14 @@ RETURNS MADLIB_SCHEMA.matrix_result AS $$
PythonFunctionBodyOnly(`linalg', `matrix_ops')
matrix_ops.matrix_qr(schema_madlib, matrix_in, in_args,
matrix_out_prefix, out_args)
- return ["Prefix = {0}, add _q, _r at the end of prefix "
- "get the Q and R matrices respectively".format(matrix_out_prefix)]
+ result = """
+ For QR decomposition (A = QR),
+ the corresponding matrices can be obtained by appending following suffixes
+ to {0}:
+ _q for orthogonal matrix Q
+ _r for upper triangular factor R
+ """.format(matrix_out_prefix)
+ return [result]
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/83391ace/src/ports/postgres/modules/stats/correlation.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/stats/correlation.py_in b/src/ports/postgres/modules/stats/correlation.py_in
index 83604f4..fc9959b 100644
--- a/src/ports/postgres/modules/stats/correlation.py_in
+++ b/src/ports/postgres/modules/stats/correlation.py_in
@@ -67,22 +67,23 @@ def correlation(schema_madlib, source_table, output_table,
if len(_existing_target_cols) == 1:
plpy.error("Correlation error: Only one numeric column found in the target list.")
+ run_time = _populate_output_table(schema_madlib, source_table, output_table,
+ _existing_target_cols, get_cov, verbose)
# ---- Output message ----
- output_text_mesasge = "Summary for 'correlation' function"
+ output_text_list = ["Summary for 'correlation' function"]
+ output_text_list.append("Output table = " + str(output_table))
if _nonnumeric_target_cols:
- output_text_mesasge += ("\n Non-numeric columns ignored: {0}".
- format(str(_nonnumeric_target_cols)))
+ output_text_list.append(("Non-numeric columns ignored: {0}".
+ format(str(_nonnumeric_target_cols))))
if _nonexisting_target_cols:
- output_text_mesasge += ("\n Columns that don't exist in '{0}' ignored: {1}".
- format(source_table, str(_nonexisting_target_cols)))
+ output_text_list.append(("Columns that don't exist in '{0}' ignored: {1}".
+ format(source_table, str(_nonexisting_target_cols))))
- output_text_mesasge += ("\n Producing correlation for columns: {0}".
- format(str(_existing_target_cols)))
- plpy.notice(output_text_mesasge)
+ output_text_list.append(("Producing correlation for columns: {0}".
+ format(str(_existing_target_cols))))
+ output_text_list.append("Total run time = " + str(run_time))
# ---- Output message ----
-
- return _populate_output_table(schema_madlib, source_table, output_table,
- _existing_target_cols, get_cov, verbose)
+ return '\n'.join(output_text_list)
# ------------------------------------------------------------------------------
@@ -255,7 +256,7 @@ def _populate_output_table(schema_madlib, source_table, output_table,
# clean up and return
plpy.execute("DROP TABLE {temp_table}".format(**locals()))
end = time()
- return (output_table, len(col_names), end - start)
+ return end - start
# ------------------------------------------------------------------------------
@@ -289,9 +290,9 @@ The columns of the table are described as follows:
in 'source_table'.
The output table is arranged as a lower-traingular matrix with the upper
-triangle set to NULL and the diagonal elements set to 1.0. To obtain the
-result from the output_table in this matrix format ensure to order the
-elements using the 'column_position' column.
+triangle set to NULL. To obtain the result from the output_table in this matrix
+format ensure to order the elements using the 'column_position' column.
+
""".format(schema_madlib=schema_madlib, func=func)
elif message is not None and message.lower() in ('example', 'examples'):
return """
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/83391ace/src/ports/postgres/modules/stats/correlation.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/stats/correlation.sql_in b/src/ports/postgres/modules/stats/correlation.sql_in
index 36cb133..3e4e9bd 100644
--- a/src/ports/postgres/modules/stats/correlation.sql_in
+++ b/src/ports/postgres/modules/stats/correlation.sql_in
@@ -276,16 +276,6 @@ CREATE AGGREGATE MADLIB_SCHEMA.covariance_agg(
);
-----------------------------------------------------------------------
-
-DROP TYPE IF EXISTS MADLIB_SCHEMA.correlation_result CASCADE;
-CREATE TYPE MADLIB_SCHEMA.correlation_result AS
-(
- output_table text,
- row_count integer,
- duration double precision
-);
-
------------------------------------------------------------------------
-- Main function for correlation
-----------------------------------------------------------------------
/* @brief Compute a correlation matrix for a table with optional target columns specified
@@ -308,7 +298,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.correlation(
output_table varchar, -- output table name
target_cols varchar, -- comma separated list of output cols (default = '*')
verbose boolean -- flag to determine verbosity
-) RETURNS MADLIB_SCHEMA.correlation_result AS $$
+) RETURNS TEXT AS $$
PythonFunction(stats, correlation, correlation)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
@@ -321,7 +311,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.correlation(
output_table varchar, -- output table name
target_cols varchar -- comma separated list of output cols (default = '*')
)
-RETURNS MADLIB_SCHEMA.correlation_result AS $$
+RETURNS TEXT AS $$
select MADLIB_SCHEMA.correlation($1, $2, $3, FALSE)
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
@@ -330,7 +320,7 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.correlation(
source_table varchar, -- input table name
output_table varchar -- output table name
-) RETURNS MADLIB_SCHEMA.correlation_result AS $$
+) RETURNS TEXT AS $$
select MADLIB_SCHEMA.correlation($1, $2, NULL)
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
@@ -348,7 +338,7 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
-----------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.correlation()
-RETURNS text AS $$
+RETURNS TEXT AS $$
PythonFunctionBodyOnly(`stats', `correlation')
return correlation.correlation_help_message(schema_madlib, None)
$$ LANGUAGE plpythonu IMMUTABLE
@@ -378,7 +368,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.covariance(
output_table varchar, -- output table name
target_cols varchar, -- comma separated list of output cols (default = '*')
verbose boolean -- flag to determine verbosity
-) RETURNS MADLIB_SCHEMA.correlation_result AS $$
+) RETURNS TEXT AS $$
PythonFunctionBodyOnly(`stats', `correlation')
return correlation.correlation(schema_madlib, source_table, output_table,
target_cols, True, verbose)
@@ -393,7 +383,7 @@ CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.covariance(
output_table varchar, -- output table name
target_cols varchar -- comma separated list of output cols (default = '*')
)
-RETURNS MADLIB_SCHEMA.correlation_result AS $$
+RETURNS TEXT AS $$
select MADLIB_SCHEMA.covariance($1, $2, $3, FALSE)
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
@@ -402,7 +392,7 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.covariance(
source_table varchar, -- input table name
output_table varchar -- output table name
-) RETURNS MADLIB_SCHEMA.correlation_result AS $$
+) RETURNS TEXT AS $$
select MADLIB_SCHEMA.covariance($1, $2, NULL)
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
@@ -420,7 +410,7 @@ m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
-----------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.covariance()
-RETURNS text AS $$
+RETURNS TEXT AS $$
PythonFunctionBodyOnly(`stats', `correlation')
return correlation.correlation_help_message(schema_madlib, None, cov=True)
$$ LANGUAGE plpythonu IMMUTABLE
[2/2] incubator-madlib git commit: Elastic Net: Check only if
features are numeric
Posted by ri...@apache.org.
Elastic Net: Check only if features are numeric
JIRA: MADLIB-952
Columns were being checked to ensure every column is of the same numeric
type. While giving an error for non-numeric types is correct, there is
no need to enforce them to be of same numeric type, as the columns are
cast to float8[] inside the function. The input analyzer has been
changed to relax this condition.
This closes #12.
Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/d282e0f3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/d282e0f3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/d282e0f3
Branch: refs/heads/master
Commit: d282e0f362c522e74f310b0148d392db6d63ac7c
Parents: 83391ac
Author: Orhan Kislal <ok...@pivotal.io>
Authored: Wed Jan 20 13:13:08 2016 -0800
Committer: Rahul Iyer <ri...@pivotal.io>
Committed: Wed Jan 20 13:22:54 2016 -0800
----------------------------------------------------------------------
.../modules/elastic_net/elastic_net.py_in | 6 ++---
.../elastic_net_optimizer_fista.py_in | 2 +-
.../test/elastic_net_install_check.sql_in | 27 ++++++++++++++++++++
3 files changed, 30 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/d282e0f3/src/ports/postgres/modules/elastic_net/elastic_net.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/elastic_net/elastic_net.py_in b/src/ports/postgres/modules/elastic_net/elastic_net.py_in
index 21c581a..22266f9 100644
--- a/src/ports/postgres/modules/elastic_net/elastic_net.py_in
+++ b/src/ports/postgres/modules/elastic_net/elastic_net.py_in
@@ -302,7 +302,6 @@ def elastic_net_train(schema_madlib, tbl_source, tbl_result, col_dep_var,
SELECT {schema_madlib}.elastic_net_train('gaussian');
for supported optimizers.
""".format(schema_madlib=schema_madlib))
-
# handle all special cases of col_ind_var
col_ind_var, outstr_array = analyze_input_str(schema_madlib, tbl_source,
col_ind_var, col_dep_var,
@@ -420,13 +419,12 @@ def analyze_input_str(schema_madlib, tbl_source,
col_ind_var)
else:
included_col_types = [col_types_dict[i] for i in outstr_array]
- if not all(i == included_col_types[0] and is_psql_numeric_type(i)
+ if not all(is_psql_numeric_type(i)
for i in included_col_types):
plpy.error("""
Elastic Net error: All columns to be included in the
- independent variables should be of the same numeric type.
+ independent variables should be of the numeric type.
""")
-
col_ind_var_new = "ARRAY[" + ','.join(outstr_array) + "]"
return (col_ind_var_new, outstr_array)
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/d282e0f3/src/ports/postgres/modules/elastic_net/elastic_net_optimizer_fista.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/elastic_net/elastic_net_optimizer_fista.py_in b/src/ports/postgres/modules/elastic_net/elastic_net_optimizer_fista.py_in
index a546de4..e3e5977 100644
--- a/src/ports/postgres/modules/elastic_net/elastic_net_optimizer_fista.py_in
+++ b/src/ports/postgres/modules/elastic_net/elastic_net_optimizer_fista.py_in
@@ -422,7 +422,7 @@ def __compute_fista(schema_madlib, func_step_aggregate, func_state_diff,
arguments
@param tbl_state Name of the (temporary) table containing the inter-iteration
states
- @param rel_source Name of the relation containing input points
+ @param tbl_source Name of the relation containing input points
@param col_ind_var Name of the independent variables column
@param col_dep_var Name of the dependent variable column
@param drop_table Boolean, whether to use IterationController (True) or
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/d282e0f3/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in b/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
index c45ccd2..643750a 100644
--- a/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
+++ b/src/ports/postgres/modules/elastic_net/test/elastic_net_install_check.sql_in
@@ -514,6 +514,16 @@ COPY lin_housing_wi (x, y) FROM STDIN NULL '?';
{1,0.04741,0.00,11.930,0,0.5730,6.0300,80.80,2.5050,1,273.0,21.00,396.90,7.88} 11.90
\.
+DROP TABLE IF EXISTS elastic_type_src;
+
+CREATE TABLE elastic_type_src ( var_int int, var_float8 float8, var_sint smallint);
+COPY elastic_type_src (var_int, var_float8, var_sint) FROM stdin DELIMITER ',' NULL '?' ;
+1, 1.1, 1
+2, 2.2, 2
+3, 3.3, 3
+4, 4.4, 4
+\.
+
create function check_elastic_net ()
returns void as $$
begin
@@ -614,6 +624,23 @@ begin
'Elastic Net: log-likelihood mismatch (use_active_set = t)!'
) from house_en;
+ execute 'DROP TABLE IF EXISTS elastic_type_res';
+ perform elastic_net_train('elastic_type_src',
+ 'elastic_type_res',
+ 'var_int < 0',
+ '*',
+ 'binomial',
+ 0.6,
+ 0.02,
+ TRUE,
+ NULL,
+ 'fista',
+ '',
+ '',
+ 10000,
+ 1e-6
+ );
+
end;
$$ language plpgsql volatile;