You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@madlib.apache.org by GitBox <gi...@apache.org> on 2019/06/05 23:10:24 UTC

[GitHub] [madlib] fmcquillan99 edited a comment on issue #403: SVM: Fix class weights when specified as a mapping

fmcquillan99 edited a comment on issue #403: SVM: Fix class weights when specified as a mapping
URL: https://github.com/apache/madlib/pull/403#issuecomment-499288758
 
 
   ```
   DROP TABLE IF EXISTS houses;
   CREATE TABLE houses (id INT, tax INT, bedroom INT, bath FLOAT, price INT,
               size INT, lot INT, price2 TEXT);
   INSERT INTO houses VALUES
     (1 ,  590 ,       2 ,    1 ,  50000 ,  770 , 22100, 'lt100k'),
     (2 , 1050 ,       3 ,    2 ,  85000 , 1410 , 12000, 'lt100k'),
     (3 ,   20 ,       3 ,    1 ,  22500 , 1060 ,  3500, 'lt100k'),
     (4 ,  870 ,       2 ,    2 ,  90000 , 1300 , 17500, 'lt100k'),
     (5 , 1320 ,       3 ,    2 , 133000 , 1500 , 30000, 'gt100k'),
     (6 , 1350 ,       2 ,    1 ,  90500 ,  820 , 25700, 'lt100k'),
     (7 , 2790 ,       3 ,  2.5 , 260000 , 2130 , 25000, 'gt100k'),
     (8 ,  680 ,       2 ,    1 , 142500 , 1170 , 22000, 'gt100k'),
     (9 , 1840 ,       3 ,    2 , 160000 , 1500 , 19000, 'gt100k'),
    (10 , 3680 ,       4 ,    2 , 240000 , 2790 , 20000, 'gt100k'),
    (11 , 1660 ,       3 ,    1 ,  87000 , 1030 , 17500, 'lt100k'),
    (12 , 1620 ,       3 ,    2 , 118600 , 1250 , 20000, 'gt100k'),
    (13 , 3100 ,       3 ,    2 , 140000 , 1760 , 38000, 'gt100k'),
    (14 , 2070 ,       2 ,    3 , 148000 , 1550 , 14000, 'gt100k'),
    (15 ,  650 ,       3 ,  1.5 ,  65000 , 1450 , 12000, 'lt100k');
   ```
   
   ```
   DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random;
   SELECT madlib.svm_classification( 'houses',
                                     'houses_svm_gaussian',
                                     'price < 150000',
                                     'ARRAY[1, tax, bath, size]',
                                     'gaussian',
                                     'n_components=10',
                                     '',
                                     'init_stepsize=1, max_iter=200, class_weight=balanced'
                              );
   \x on
   SELECT * FROM houses_svm_gaussian;
   
   -[ RECORD 1 ]------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   coef               | {0.714844965488907,0.250318127518095,-2.38928383420753,0.496511319241991,-0.732968736678418,0.405804203439028,-0.382933445051466,1.03493605457998,1.36269794687058,1.34976308449158}
   loss               | 0.573568156888657
   norm_of_gradient   | 1.31262747172053
   num_iterations     | 176
   num_rows_processed | 15
   num_rows_skipped   | 0
   dep_var_mapping    | {f,t}
   ```
   
   ```
   DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random;
   SELECT madlib.svm_classification( 'houses',
                                     'houses_svm_gaussian',
                                     'price < 150000',
                                     'ARRAY[1, tax, bath, size]',
                                     'gaussian',
                                     'n_components=10',
                                     '',
                                     'init_stepsize=1, max_iter=200, class_weight={true:1, false:3}'
                              );
   \x on
   SELECT * FROM houses_svm_gaussian;
   
   -[ RECORD 1 ]------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   coef               | {0.930683028668259,0.505925477275442,-2.56846778339476,0.556405030919901,-1.07556735802439,0.325697706517631,-0.872494815785118,1.39148131884382,1.41452211430188,2.02648079470983}
   loss               | 0.526907702801827
   norm_of_gradient   | 1.73625539167368
   num_iterations     | 148
   num_rows_processed | 15
   num_rows_skipped   | 0
   dep_var_mapping    | {f,t}
   ```
   
   ```
   DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random;
   SELECT madlib.svm_classification( 'houses',
                                     'houses_svm_gaussian',
                                     'price < 150000',
                                     'ARRAY[1, tax, bath, size]',
                                     'gaussian',
                                     'n_components=10',
                                     '',
                                     'init_stepsize=1, max_iter=200, class_weight={false:3}'
                              );
   \x on
   SELECT * FROM houses_svm_gaussian;
   
   -[ RECORD 1 ]------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   coef               | {0.812714358084175,0.516611089846274,-2.65310766450326,0.67665375700828,-1.1073276109641,0.303844438423391,-0.958238313229976,1.30096201645626,1.44218799133577,2.20663265611754}
   loss               | 0.518638591961475
   norm_of_gradient   | 1.45988195776794
   num_iterations     | 183
   num_rows_processed | 15
   num_rows_skipped   | 0
   dep_var_mapping    | {f,t}
   ```
   
   ```
   DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random;
   SELECT madlib.svm_classification( 'houses',
                                     'houses_svm_gaussian',
                                     'price2',
                                     'ARRAY[1, tax, bath, size]',
                                     'gaussian',
                                     'n_components=10',
                                     '',
                                     'init_stepsize=1, max_iter=200, class_weight={gt100k:1, lt100k:3}'
                              );
   \x on
   SELECT * FROM houses_svm_gaussian;
   
   -[ RECORD 1 ]------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
   coef               | {-1.88839942488571,1.99106255876569,-0.795671096255553,2.1633019248309,2.92527682698349,-0.168696166478298,2.5589349412574,3.40128219835538,0.860916227855357,3.07703136724834}
   loss               | 0
   norm_of_gradient   | 0
   num_iterations     | 200
   num_rows_processed | 15
   num_rows_skipped   | 0
   dep_var_mapping    | {gt100k,lt100k}
   ```
   
   ```
   DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random;
   SELECT madlib.svm_classification( 'houses',
                                     'houses_svm_gaussian',
                                     'price2',
                                     'ARRAY[1, tax, bath, size]',
                                     'gaussian',
                                     'n_components=10',
                                     '',
                                     'init_stepsize=1, max_iter=200, class_weight={qqq:1, lt100k:3}'
                              );
   \x on
   SELECT * FROM houses_svm_gaussian;
   
   ERROR:  plpy.Error: SVM: Key 'qqq' in '{qqq:1, lt100k:3}' is not a valid class label. (plpython.c:5038)
   CONTEXT:  Traceback (most recent call last):
     PL/Python function "svm_classification", line 24, in <module>
       return svm.svm(**globals())
     PL/Python function "svm_classification", line 816, in svm
     PL/Python function "svm_classification", line 1068, in _svm_parsed_params
     PL/Python function "svm_classification", line 982, in _compute_class_weight_sql
     PL/Python function "svm_classification", line 96, in _assert
   PL/Python function "svm_classification"
   madlib=# \x on
   Expanded display is on.
   madlib=# SELECT * FROM houses_svm_gaussian;
   ERROR:  relation "houses_svm_gaussian" does not exist
   LINE 1: SELECT * FROM houses_svm_gaussian;
   ```
   
   ```
   DROP TABLE IF EXISTS houses_svm_gaussian, houses_svm_gaussian_summary, houses_svm_gaussian_random;
   SELECT madlib.svm_classification( 'houses',
                                     'houses_svm_gaussian',
                                     'price2',
                                     'ARRAY[1, tax, bath, size]',
                                     'gaussian',
                                     'n_components=10',
                                     '',
                                     'init_stepsize=1, max_iter=200, class_weight={gt100k:1, lt100k:3, qqq:2}'
                              );
   \x on
   SELECT * FROM houses_svm_gaussian;
   
   ERROR:  plpy.Error: SVM: Only binary classification is supported. The class_weight param should have at least one and at most two labels in it. (plpython.c:5038)
   CONTEXT:  Traceback (most recent call last):
     PL/Python function "svm_classification", line 24, in <module>
       return svm.svm(**globals())
     PL/Python function "svm_classification", line 816, in svm
     PL/Python function "svm_classification", line 1068, in _svm_parsed_params
     PL/Python function "svm_classification", line 966, in _compute_class_weight_sql
     PL/Python function "svm_classification", line 96, in _assert
   PL/Python function "svm_classification"
   madlib=# \x on
   Expanded display is on.
   madlib=# SELECT * FROM houses_svm_gaussian;
   ERROR:  relation "houses_svm_gaussian" does not exist
   LINE 1: SELECT * FROM houses_svm_gaussian;
   ```
   
   LGTM

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services