You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2016/03/26 00:49:02 UTC
[3/3] incubator-madlib git commit: SVM: Better NULL handling + use
temp for random matrices
SVM: Better NULL handling + use temp for random matrices
Closes #28, closes #30
Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/62a99ce6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/62a99ce6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/62a99ce6
Branch: refs/heads/master
Commit: 62a99ce618280c13c0415dcacc72ad48205e1107
Parents: 360f134
Author: Xiaocheng Tang <xi...@gmail.com>
Authored: Fri Mar 11 09:49:42 2016 -0800
Committer: Rahul Iyer <ri...@pivotal.io>
Committed: Fri Mar 25 16:48:32 2016 -0700
----------------------------------------------------------------------
.../postgres/modules/linalg/matrix_ops.py_in | 9 +++++----
.../modules/svm/kernel_approximation.py_in | 19 ++++++++++++-------
2 files changed, 17 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/62a99ce6/src/ports/postgres/modules/linalg/matrix_ops.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/linalg/matrix_ops.py_in b/src/ports/postgres/modules/linalg/matrix_ops.py_in
index d11dcdb..458a763 100644
--- a/src/ports/postgres/modules/linalg/matrix_ops.py_in
+++ b/src/ports/postgres/modules/linalg/matrix_ops.py_in
@@ -3125,8 +3125,8 @@ def matrix_random(schema_madlib, distribution, row_dim, col_dim,
else:
distribution = 'uniform'
- in_args_default = {'seed': randint(0, 1000), 'table_type': ''}
- in_args_types = {'seed': int, 'table_type': str}
+ in_args_default = {'seed': randint(0, 1000), 'temp_out': False}
+ in_args_types = {'seed': int, 'temp_out': bool}
if distribution == 'normal':
in_args_default.update({'mu': 0, 'sigma': 1})
in_args_types.update({'mu': float, 'sigma': float})
@@ -3164,7 +3164,7 @@ def matrix_random(schema_madlib, distribution, row_dim, col_dim,
.format(distribution, ', '.join(sorted(supported_dist))))
plpy.execute("""
- CREATE {in_args_vals[table_type]} TABLE {matrix_out}
+ CREATE {is_temp} TABLE {matrix_out}
m4_ifdef(`__POSTGRESQL__', `',
`WITH (APPENDONLY=TRUE,COMPRESSTYPE=QUICKLZ)') AS
SELECT
@@ -3174,4 +3174,5 @@ def matrix_random(schema_madlib, distribution, row_dim, col_dim,
generate_series(1, {row_dim}) as row
m4_ifdef(`__POSTGRESQL__', `',
`DISTRIBUTED BY ({out_args[row]})')
- """.format(**locals()))
+ """.format(is_temp=True if in_args_vals['temp_out'] else False,
+ **locals()))
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/62a99ce6/src/ports/postgres/modules/svm/kernel_approximation.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/svm/kernel_approximation.py_in b/src/ports/postgres/modules/svm/kernel_approximation.py_in
index 190692e..0a09fbf 100644
--- a/src/ports/postgres/modules/svm/kernel_approximation.py_in
+++ b/src/ports/postgres/modules/svm/kernel_approximation.py_in
@@ -194,7 +194,7 @@ class PolyKernel(object):
drop table if exists {rd_weights};
select {schema_madlib}.matrix_random(
1, {dim},
- 'upper=1, lower=-1, seed={seed}, table_type=temp',
+ 'upper=1, lower=-1, seed={seed}, temp_out=true',
'bernoulli', '{rd_weights}',
'row={id}, val={val}')
""".format(rd_weights=rd_weights_,
@@ -303,6 +303,7 @@ class PolyKernel(object):
{id_col},
{grouping_col}
from {source_table}
+ WHERE not {schema_madlib}.array_contains_null({independent_varname})
) q cross join (select {pro.rd_val} from {pro.weights}) as weights
cross join (select {pro.rd_val} from {pro.coefs}) as coefs
cross join (select {pro.rd_val} from {pro.reps}) as reps
@@ -348,26 +349,28 @@ class GaussianKernelBase(object):
def _random_weights(self, row_dim, col_dim, rd_id, rd_val):
rd_weights = unique_string(desp='random_weights')
sigma = sqrt(2 * self.gamma)
+ seed = self.random_state
plpy.execute("""
drop table if exists {rd_weights};
select {self.schema_madlib}.matrix_random(
{row_dim}, {col_dim},
- 'mu=0, sigma={sigma}, seed={self.random_state}',
- 'normal',
- '{rd_weights}','row={rd_id}, val={rd_val}');
+ 'mu=0, sigma={sigma}, seed={seed}, temp_out=true',
+ 'normal', '{rd_weights}',
+ 'row={rd_id}, val={rd_val}');
""".format(**locals()))
return rd_weights
def _random_offsets(self, row_dim, col_dim, rd_id, rd_val):
rd_offset = unique_string(desp='random_offsets')
max_ = 2 * pi
+ seed = self.random_state
plpy.execute("""
drop table if exists {rd_offset};
select {self.schema_madlib}.matrix_random(
{row_dim}, {col_dim},
- 'min=0, max={max_}, seed={self.random_state}',
- 'uniform',
- '{rd_offset}','row={rd_id}, val={rd_val}');
+ 'min=0, max={max_}, seed={seed}, temp_out=true',
+ 'uniform', '{rd_offset}',
+ 'row={rd_id}, val={rd_val}');
""".format(**locals()))
return rd_offset
@@ -538,6 +541,7 @@ class GaussianKernel(GaussianKernelBase):
{id_col},
{grouping_col}
from {source_table}
+ WHERE not {schema_madlib}.array_contains_null({independent_varname})
""".format(**locals())
plpy.execute(run_sql)
source_table = source_with_id
@@ -688,6 +692,7 @@ class GaussianKernelInMemory(GaussianKernelBase):
{id_col},
{grouping_col}
from {source_table}
+ WHERE not {schema_madlib}.array_contains_null({independent_varname})
) q
cross join (select {self.rd_val} from {self.rd_weights}) as rw
cross join (select {self.rd_val} from {self.rd_offset}) as ro