You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@madlib.apache.org by ri...@apache.org on 2017/04/27 19:12:52 UTC
incubator-madlib git commit: DT: Update error message for invalid
num_splits
Repository: incubator-madlib
Updated Branches:
refs/heads/master a3d54be66 -> c4fd91e16
DT: Update error message for invalid num_splits
Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/c4fd91e1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/c4fd91e1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/c4fd91e1
Branch: refs/heads/master
Commit: c4fd91e16827a5f8be4051eb3ea0d311d3e957f2
Parents: a3d54be
Author: Rahul Iyer <ri...@apache.org>
Authored: Thu Apr 27 12:12:48 2017 -0700
Committer: Rahul Iyer <ri...@apache.org>
Committed: Thu Apr 27 12:12:48 2017 -0700
----------------------------------------------------------------------
src/modules/recursive_partitioning/feature_encoding.cpp | 8 ++++++--
.../recursive_partitioning/test/decision_tree.sql_in | 12 +++++++++---
2 files changed, 15 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/c4fd91e1/src/modules/recursive_partitioning/feature_encoding.cpp
----------------------------------------------------------------------
diff --git a/src/modules/recursive_partitioning/feature_encoding.cpp b/src/modules/recursive_partitioning/feature_encoding.cpp
index 20856e2..3b0a452 100644
--- a/src/modules/recursive_partitioning/feature_encoding.cpp
+++ b/src/modules/recursive_partitioning/feature_encoding.cpp
@@ -39,7 +39,7 @@ dst_compute_con_splits_transition::run(AnyType &args){
if (!state.empty() && state.num_rows >= state.buff_size) {
return args[0];
}
- // NULL-handling is done in python to make sure consistency b/w
+ // NULLs are handled by caller to ensure consistency between
// feature encoding and tree training
MappedColumnVector con_features = args[1].getAs<MappedColumnVector>();
@@ -71,8 +71,12 @@ dst_compute_con_splits_final::run(AnyType &args){
if (state.num_rows <= state.num_splits) {
std::stringstream error_msg;
+ // In message below, add 1 to state.num_splits since the meaning of
+ // "splits" for the caller is the number of quantiles, where as
+ // "splits" in this function is the number of values dividing the data
+ // into quantiles.
error_msg << "Decision tree error: Number of splits ("
- << state.num_splits
+ << state.num_splits + 1
<< ") is larger than the number of records ("
<< state.num_rows << ")";
throw std::runtime_error(error_msg.str());
http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/c4fd91e1/src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in b/src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in
index 28a4647..dd861a0 100644
--- a/src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in
+++ b/src/ports/postgres/modules/recursive_partitioning/test/decision_tree.sql_in
@@ -287,7 +287,7 @@ SELECT tree_train('dt_golf'::text, -- source table
'train_output'::text, -- output model table
'id'::text, -- id column
'temperature::double precision'::text, -- response
- 'humidity, windy'::text, -- features
+ '"OUTLOOK", humidity, windy'::text, -- features
NULL::text, -- exclude columns
'gini'::text, -- split criterion
'class'::text, -- grouping
@@ -301,13 +301,19 @@ SELECT tree_train('dt_golf'::text, -- source table
SELECT _print_decision_tree(tree) from train_output;
SELECT tree_display('train_output', False);
-SELECT tree_predict('train_output', 'dt_golf', 'predict_output');
+
+CREATE TABLE dt_golf2 as
+SELECT * FROM dt_golf
+UNION
+SELECT 15 as id, 'humid' as "OUTLOOK", 71 as temperature, 80 as humidity,
+ true as windy, 'Don''t Play' as class;
+SELECT tree_predict('train_output', 'dt_golf2', 'predict_output');
\x off
SELECT *
FROM
predict_output
JOIN
- dt_golf
+ dt_golf2
USING (id);
\x on
select * from train_output;