You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@madlib.apache.org by GitBox <gi...@apache.org> on 2019/06/05 23:10:24 UTC
[GitHub] [madlib] fmcquillan99 edited a comment on issue #403: SVM: Fix
class weights when specified as a mapping
fmcquillan99 edited a comment on issue #403: SVM: Fix class weights when specified as a mapping
URL: https://github.com/apache/madlib/pull/403#issuecomment-499288758
```
DROP TABLE IF EXISTS houses;
CREATE TABLE houses (id INT, tax INT, bedroom INT, bath FLOAT, price INT,
size INT, lot INT, price2 TEXT);
INSERT INTO houses VALUES
(1 , 590 , 2 , 1 , 50000 , 770 , 22100, 'lt100k'),
(2 , 1050 , 3 , 2 , 85000 , 1410 , 12000, 'lt100k'),
(3 , 20 , 3 , 1 , 22500 , 1060 , 3500, 'lt100k'),
(4 , 870 , 2 , 2 , 90000 , 1300 , 17500, 'lt100k'),
(5 , 1320 , 3 , 2 , 133000 , 1500 , 30000, 'gt100k'),
(6 , 1350 , 2 , 1 , 90500 , 820 , 25700, 'lt100k'),
(7 , 2790 , 3 , 2.5 , 260000 , 2130 , 25000, 'gt100k'),
(8 , 680 , 2 , 1 , 142500 , 1170 , 22000, 'gt100k'),
(9 , 1840 , 3 , 2 , 160000 , 1500 , 19000, 'gt100k'),
(10 , 3680 , 4 , 2 , 240000 , 2790 , 20000, 'gt100k'),
(11 , 1660 , 3 , 1 , 87000 , 1030 , 17500, 'lt100k'),
(12 , 1620 , 3 , 2 , 118600 , 1250 , 20000, 'gt100k'),
(13 , 3100 , 3 , 2 , 140000 , 1760 , 38000, 'gt100k'),
(14 , 2070 , 2 , 3 , 148000 , 1550 , 14000, 'gt100k'),
(15 , 650 , 3 , 1.5 , 65000 , 1450 , 12000, 'lt100k');
```
```
DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random;
SELECT madlib.svm_classification( 'houses',
'houses_svm_gaussian',
'price < 150000',
'ARRAY[1, tax, bath, size]',
'gaussian',
'n_components=10',
'',
'init_stepsize=1, max_iter=200, class_weight=balanced'
);
\x on
SELECT * FROM houses_svm_gaussian;
-[ RECORD 1 ]------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
coef | {0.714844965488907,0.250318127518095,-2.38928383420753,0.496511319241991,-0.732968736678418,0.405804203439028,-0.382933445051466,1.03493605457998,1.36269794687058,1.34976308449158}
loss | 0.573568156888657
norm_of_gradient | 1.31262747172053
num_iterations | 176
num_rows_processed | 15
num_rows_skipped | 0
dep_var_mapping | {f,t}
```
```
DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random;
SELECT madlib.svm_classification( 'houses',
'houses_svm_gaussian',
'price < 150000',
'ARRAY[1, tax, bath, size]',
'gaussian',
'n_components=10',
'',
'init_stepsize=1, max_iter=200, class_weight={true:1, false:3}'
);
\x on
SELECT * FROM houses_svm_gaussian;
-[ RECORD 1 ]------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
coef | {0.930683028668259,0.505925477275442,-2.56846778339476,0.556405030919901,-1.07556735802439,0.325697706517631,-0.872494815785118,1.39148131884382,1.41452211430188,2.02648079470983}
loss | 0.526907702801827
norm_of_gradient | 1.73625539167368
num_iterations | 148
num_rows_processed | 15
num_rows_skipped | 0
dep_var_mapping | {f,t}
```
```
DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random;
SELECT madlib.svm_classification( 'houses',
'houses_svm_gaussian',
'price < 150000',
'ARRAY[1, tax, bath, size]',
'gaussian',
'n_components=10',
'',
'init_stepsize=1, max_iter=200, class_weight={false:3}'
);
\x on
SELECT * FROM houses_svm_gaussian;
-[ RECORD 1 ]------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
coef | {0.812714358084175,0.516611089846274,-2.65310766450326,0.67665375700828,-1.1073276109641,0.303844438423391,-0.958238313229976,1.30096201645626,1.44218799133577,2.20663265611754}
loss | 0.518638591961475
norm_of_gradient | 1.45988195776794
num_iterations | 183
num_rows_processed | 15
num_rows_skipped | 0
dep_var_mapping | {f,t}
```
```
DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random;
SELECT madlib.svm_classification( 'houses',
'houses_svm_gaussian',
'price2',
'ARRAY[1, tax, bath, size]',
'gaussian',
'n_components=10',
'',
'init_stepsize=1, max_iter=200, class_weight={gt100k:1, lt100k:3}'
);
\x on
SELECT * FROM houses_svm_gaussian;
-[ RECORD 1 ]------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
coef | {-1.88839942488571,1.99106255876569,-0.795671096255553,2.1633019248309,2.92527682698349,-0.168696166478298,2.5589349412574,3.40128219835538,0.860916227855357,3.07703136724834}
loss | 0
norm_of_gradient | 0
num_iterations | 200
num_rows_processed | 15
num_rows_skipped | 0
dep_var_mapping | {gt100k,lt100k}
```
```
DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random;
SELECT madlib.svm_classification( 'houses',
'houses_svm_gaussian',
'price2',
'ARRAY[1, tax, bath, size]',
'gaussian',
'n_components=10',
'',
'init_stepsize=1, max_iter=200, class_weight={qqq:1, lt100k:3}'
);
\x on
SELECT * FROM houses_svm_gaussian;
ERROR: plpy.Error: SVM: Key 'qqq' in '{qqq:1, lt100k:3}' is not a valid class label. (plpython.c:5038)
CONTEXT: Traceback (most recent call last):
PL/Python function "svm_classification", line 24, in <module>
return svm.svm(**globals())
PL/Python function "svm_classification", line 816, in svm
PL/Python function "svm_classification", line 1068, in _svm_parsed_params
PL/Python function "svm_classification", line 982, in _compute_class_weight_sql
PL/Python function "svm_classification", line 96, in _assert
PL/Python function "svm_classification"
madlib=# \x on
Expanded display is on.
madlib=# SELECT * FROM houses_svm_gaussian;
ERROR: relation "houses_svm_gaussian" does not exist
LINE 1: SELECT * FROM houses_svm_gaussian;
```
```
DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random;
SELECT madlib.svm_classification( 'houses',
'houses_svm_gaussian',
'price2',
'ARRAY[1, tax, bath, size]',
'gaussian',
'n_components=10',
'',
'init_stepsize=1, max_iter=200, class_weight={gt100k:1, lt100k:3, qqq:2}'
);
\x on
SELECT * FROM houses_svm_gaussian;
ERROR: plpy.Error: SVM: Only binary classification is supported. The class_weight param should have at least one and at most two labels in it. (plpython.c:5038)
CONTEXT: Traceback (most recent call last):
PL/Python function "svm_classification", line 24, in <module>
return svm.svm(**globals())
PL/Python function "svm_classification", line 816, in svm
PL/Python function "svm_classification", line 1068, in _svm_parsed_params
PL/Python function "svm_classification", line 966, in _compute_class_weight_sql
PL/Python function "svm_classification", line 96, in _assert
PL/Python function "svm_classification"
madlib=# \x on
Expanded display is on.
madlib=# SELECT * FROM houses_svm_gaussian;
ERROR: relation "houses_svm_gaussian" does not exist
LINE 1: SELECT * FROM houses_svm_gaussian;
```
LGTM
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services